1 year ago · 7185200e63
--- a/RealtimeSTT/audio_recorder.py
+++ b/RealtimeSTT/audio_recorder.py
@@ -159,6 +159,7 @@ class AudioToTextRecorder:
 
															                  sample_rate: int = SAMPLE_RATE,
														
 
															                  initial_prompt: Optional[Union[str, Iterable[int]]] = None,
														
 
															                  suppress_tokens: Optional[List[int]] = [-1],
														
 
															+                 log_transcription_time: bool = False
														
 
															                  ):
														
 
															         """
														
 
															         Initializes an audio recorder and  transcription
														
@@ -429,18 +430,25 @@ class AudioToTextRecorder:
 
															         self.detected_realtime_language = None
														
 
															         self.detected_realtime_language_probability = 0
														
 
															         self.transcription_lock = threading.Lock()
														
 
															+        self.transcribe_count = 0
														
 
															+        self.log_transcription_time = log_transcription_time
														
 
															         # Initialize the logging configuration with the specified level
														
 
															         log_format = 'RealTimeSTT: %(name)s - %(levelname)s - %(message)s'
														
 
															+        file_log_format = '%(asctime)s - ' + log_format
														
 
															-        # Create a logger
														
 
															+        # Get the root logger
														
 
															         logger = logging.getLogger()
														
 
															-        logger.setLevel(level)  # Set the root logger's level
														
 
															+        logger.setLevel(level)  # Set the logger's level
														
 
															+        logger.propagate = False  # Prevent propagation to higher-level loggers
														
 
															+
														
 
															+        # Remove any existing handlers
														
 
															+        logger.handlers = []
														
 
															         # Create a file handler and set its level
														
 
															         file_handler = logging.FileHandler('realtimesst.log')
														
 
															         file_handler.setLevel(logging.DEBUG)
														
 
															-        file_handler.setFormatter(logging.Formatter(log_format))
														
 
															+        file_handler.setFormatter(logging.Formatter(file_log_format, datefmt='%Y-%m-%d %H:%M:%S'))
														
 
															         # Create a console handler and set its level
														
 
															         console_handler = logging.StreamHandler()
														
@@ -450,15 +458,16 @@ class AudioToTextRecorder:
 
															         # Add the handlers to the logger
														
 
															         logger.addHandler(file_handler)
														
 
															         logger.addHandler(console_handler)
														
 
															-
														
 
															+        
														
 
															         self.is_shut_down = False
														
 
															         self.shutdown_event = mp.Event()
														
 
															-
														
 
															+        
														
 
															         try:
														
 
															-            logging.debug("Explicitly setting the multiprocessing start method to 'spawn'")
														
 
															-            mp.set_start_method('spawn')
														
 
															+            # Only set the start method if it hasn't been set already
														
 
															+            if mp.get_start_method(allow_none=True) is None:
														
 
															+                mp.set_start_method("spawn")
														
 
															         except RuntimeError as e:
														
 
															-            logging.debug(f"Start method has already been set. Details: {e}")
														
 
															+            logging.info(f"Start method has already been set. Details: {e}")
														
 
															         logging.info("Starting RealTimeSTT")
														
@@ -706,9 +715,28 @@ class AudioToTextRecorder:
 
															     def _read_stdout(self):
														
 
															         while not self.shutdown_event.is_set():
														
 
															-            if self.parent_stdout_pipe.poll(0.1):
														
 
															-                message = self.parent_stdout_pipe.recv()
														
 
															-                print(message, flush=True)
														
 
															+            try:
														
 
															+                if self.parent_stdout_pipe.poll(0.1):
														
 
															+                    logging.debug("Receive from stdout pipe")
														
 
															+                    message = self.parent_stdout_pipe.recv()
														
 
															+                    logging.info(message)
														
 
															+            except (BrokenPipeError, EOFError, OSError):
														
 
															+                # The pipe probably has been closed, so we ignore the error
														
 
															+                pass
														
 
															+            # except BrokenPipeError as e:  # handle broken pipe error
														
 
															+            #     pass                
														
 
															+            # except EOFError as e:
														
 
															+            #     logging.error(f"EOFError in read from stdout: {e}")
														
 
															+            #     logging.error(traceback.format_exc())
														
 
															+            #     break            
														
 
															+            except KeyboardInterrupt:  # handle manual interruption (Ctrl+C)
														
 
															+                logging.info("KeyboardInterrupt in read from stdout detected, exiting...")
														
 
															+                break
														
 
															+            except Exception as e:
														
 
															+                logging.error(f"Unexpected error in read from stdout: {e}")
														
 
															+                logging.error(traceback.format_exc())  # Log the full traceback here
														
 
															+                break 
														
 
															+            time.sleep(0.1)
														
 
															     @staticmethod
														
 
															     def _transcription_worker(conn,
														
@@ -762,7 +790,11 @@ class AudioToTextRecorder:
 
															         """
														
 
															         def custom_print(*args, **kwargs):
														
 
															             message = ' '.join(map(str, args))
														
 
															-            stdout_pipe.send(message)
														
 
															+            try:
														
 
															+                stdout_pipe.send(message)
														
 
															+            except (BrokenPipeError, EOFError, OSError):
														
 
															+                # The pipe probably has been closed, so we ignore the error
														
 
															+                pass
														
 
															         # Replace the built-in print function with our custom one
														
 
															         __builtins__['print'] = custom_print
														
@@ -791,33 +823,48 @@ class AudioToTextRecorder:
 
															                       "transcription model initialized successfully"
														
 
															                       )
														
 
															-        while not shutdown_event.is_set():
														
 
															-            try:
														
 
															-                if conn.poll(0.01):
														
 
															-                    audio, language = conn.recv()
														
 
															-                    try:
														
 
															-                        segments, info = model.transcribe(
														
 
															-                            audio,
														
 
															-                            language=language if language else None,
														
 
															-                            beam_size=beam_size,
														
 
															-                            initial_prompt=initial_prompt,
														
 
															-                            suppress_tokens=suppress_tokens
														
 
															-                        )
														
 
															-                        transcription = " ".join(seg.text for seg in segments)
														
 
															-                        transcription = transcription.strip()
														
 
															-                        conn.send(('success', (transcription, info)))
														
 
															-                    except Exception as e:
														
 
															-                        logging.error(f"General transcription error: {e}")
														
 
															-                        conn.send(('error', str(e)))
														
 
															-                else:
														
 
															-                    time.sleep(TIME_SLEEP)
														
 
															+        try:
														
 
															+            while not shutdown_event.is_set():
														
 
															+                try:
														
 
															+                    if conn.poll(0.01):
														
 
															+                        logging.debug("Receive from _transcription_worker  pipe")
														
 
															+                        audio, language = conn.recv()
														
 
															+                        try:
														
 
															+                            segments, info = model.transcribe(
														
 
															+                                audio,
														
 
															+                                language=language if language else None,
														
 
															+                                beam_size=beam_size,
														
 
															+                                initial_prompt=initial_prompt,
														
 
															+                                suppress_tokens=suppress_tokens
														
 
															+                            )
														
 
															+                            transcription = " ".join(seg.text for seg in segments)
														
 
															+                            transcription = transcription.strip()
														
 
															+                            logging.debug(f"Final text detected with main model: {transcription}")
														
 
															+                            conn.send(('success', (transcription, info)))
														
 
															+                        except Exception as e:
														
 
															+                            logging.error(f"General error in _transcription_worker in transcription: {e}")
														
 
															+                            conn.send(('error', str(e)))
														
 
															+                    else:
														
 
															+                        time.sleep(TIME_SLEEP)
														
 
															-            except KeyboardInterrupt:
														
 
															-                interrupt_stop_event.set()
														
 
															-                logging.debug("Transcription worker process "
														
 
															-                              "finished due to KeyboardInterrupt"
														
 
															-                              )
														
 
															-                break
														
 
															+
														
 
															+
														
 
															+                except KeyboardInterrupt:
														
 
															+                    interrupt_stop_event.set()
														
 
															+                    
														
 
															+                    logging.debug("Transcription worker process "
														
 
															+                                    "finished due to KeyboardInterrupt"
														
 
															+                                    )
														
 
															+                    stdout_pipe.close()
														
 
															+                    break
														
 
															+
														
 
															+                except Exception as e:
														
 
															+                    logging.error(f"General error in _transcription_worker in accessing pipe: {e}")
														
 
															+
														
 
															+        finally:
														
 
															+            __builtins__['print'] = print  # Restore the original print function            
														
 
															+            conn.close()
														
 
															+            stdout_pipe.close()
														
 
															     @staticmethod
														
 
															     def _audio_data_worker(audio_queue,
														
@@ -916,33 +963,47 @@ class AudioToTextRecorder:
 
															         device_sample_rate = None
														
 
															         chunk_size = 1024  # Increased chunk size for better performance
														
 
															-        try:
														
 
															-            audio_interface = pyaudio.PyAudio()
														
 
															-            if input_device_index is None:
														
 
															-                try:
														
 
															-                    default_device = audio_interface.get_default_input_device_info()
														
 
															-                    input_device_index = default_device['index']
														
 
															-                except OSError as e:
														
 
															-                    input_device_index = None
														
 
															-
														
 
															-
														
 
															-            if input_device_index is not None:
														
 
															-                device_sample_rate = get_highest_sample_rate(audio_interface, input_device_index)
														
 
															-            else:
														
 
															-                device_sample_rate = 16000  # better: try 16000, 48000, ... until it works
														
 
															+        def setup_audio():  
														
 
															+            nonlocal audio_interface, stream, device_sample_rate, input_device_index
														
 
															+            try:
														
 
															+                audio_interface = pyaudio.PyAudio()
														
 
															+                if input_device_index is None:
														
 
															+                    try:
														
 
															+                        default_device = audio_interface.get_default_input_device_info()
														
 
															+                        input_device_index = default_device['index']
														
 
															+                    except OSError as e:
														
 
															+                        input_device_index = None
														
 
															+
														
 
															+                sample_rates_to_try = [16000]  # Try 16000 Hz first
														
 
															+                if input_device_index is not None:
														
 
															+                    highest_rate = get_highest_sample_rate(audio_interface, input_device_index)
														
 
															+                    if highest_rate != 16000:
														
 
															+                        sample_rates_to_try.append(highest_rate)
														
 
															+                else:
														
 
															+                    sample_rates_to_try.append(48000)  # Fallback sample rate
														
 
															-            stream = initialize_audio_stream(audio_interface, input_device_index, device_sample_rate, chunk_size)
														
 
															+                for rate in sample_rates_to_try:
														
 
															+                    try:
														
 
															+                        device_sample_rate = rate
														
 
															+                        stream = initialize_audio_stream(audio_interface, input_device_index, device_sample_rate, chunk_size)
														
 
															+                        if stream is not None:
														
 
															+                            logging.debug(f"Audio recording initialized successfully at {device_sample_rate} Hz, reading {chunk_size} frames at a time")
														
 
															+                            return True
														
 
															+                    except Exception as e:
														
 
															+                        logging.warning(f"Failed to initialize audio stream at {device_sample_rate} Hz: {e}")
														
 
															+                        continue
														
 
															-            if stream is None:
														
 
															-                raise Exception("Failed to initialize audio stream.")
														
 
															+                # If we reach here, none of the sample rates worked
														
 
															+                raise Exception("Failed to initialize audio stream with all sample rates.")
														
 
															-        except Exception as e:
														
 
															-            logging.exception(f"Error initializing pyaudio audio recording: {e}")
														
 
															-            if audio_interface:
														
 
															-                audio_interface.terminate()
														
 
															-            raise
														
 
															+            except Exception as e:
														
 
															+                logging.exception(f"Error initializing pyaudio audio recording: {e}")
														
 
															+                if audio_interface:
														
 
															+                    audio_interface.terminate()
														
 
															+                return False
														
 
															-        logging.debug(f"Audio recording initialized successfully at {device_sample_rate} Hz, reading {chunk_size} frames at a time")
														
 
															+        if not setup_audio():
														
 
															+            raise Exception("Failed to set up audio recording.")
														
 
															         buffer = bytearray()
														
 
															         silero_buffer_size = 2 * buffer_size  # silero complains if too short
														
@@ -950,7 +1011,7 @@ class AudioToTextRecorder:
 
															         try:
														
 
															             while not shutdown_event.is_set():
														
 
															                 try:
														
 
															-                    data = stream.read(chunk_size)
														
 
															+                    data = stream.read(chunk_size, exception_on_overflow=False)
														
 
															                     if use_microphone.value:
														
 
															                         processed_data = preprocess_audio(data, device_sample_rate, target_sample_rate)
														
@@ -970,13 +1031,45 @@ class AudioToTextRecorder:
 
															                         logging.warning("Input overflowed. Frame dropped.")
														
 
															                     else:
														
 
															                         logging.error(f"Error during recording: {e}")
														
 
															+                        # Attempt to reinitialize the stream
														
 
															+                        logging.info("Attempting to reinitialize the audio stream...")
														
 
															+                        if stream:
														
 
															+                            stream.stop_stream()
														
 
															+                            stream.close()
														
 
															+                        if audio_interface:
														
 
															+                            audio_interface.terminate()
														
 
															+                        
														
 
															+                        # Wait a bit before trying to reinitialize
														
 
															+                        time.sleep(1)
														
 
															+                        
														
 
															+                        if not setup_audio():
														
 
															+                            logging.error("Failed to reinitialize audio stream. Exiting.")
														
 
															+                            break
														
 
															+                        else:
														
 
															+                            logging.info("Audio stream reinitialized successfully.")
														
 
															                     continue
														
 
															                 except Exception as e:
														
 
															                     logging.error(f"Error during recording: {e}")
														
 
															                     tb_str = traceback.format_exc()
														
 
															-                    print(f"Traceback: {tb_str}")
														
 
															-                    print(f"Error: {e}")
														
 
															+                    logging.error(f"Traceback: {tb_str}")
														
 
															+                    logging.error(f"Error: {e}")
														
 
															+                    # Attempt to reinitialize the stream
														
 
															+                    logging.info("Attempting to reinitialize the audio stream...")
														
 
															+                    if stream:
														
 
															+                        stream.stop_stream()
														
 
															+                        stream.close()
														
 
															+                    if audio_interface:
														
 
															+                        audio_interface.terminate()
														
 
															+                    
														
 
															+                    # Wait a bit before trying to reinitialize
														
 
															+                    time.sleep(0.5)
														
 
															+                    
														
 
															+                    if not setup_audio():
														
 
															+                        logging.error("Failed to reinitialize audio stream. Exiting.")
														
 
															+                        break
														
 
															+                    else:
														
 
															+                        logging.info("Audio stream reinitialized successfully.")
														
 
															                     continue
														
 
															         except KeyboardInterrupt:
														
@@ -993,6 +1086,83 @@ class AudioToTextRecorder:
 
															             if audio_interface:
														
 
															                 audio_interface.terminate()
														
 
															+        # try:
														
 
															+        #     audio_interface = pyaudio.PyAudio()
														
 
															+        #     if input_device_index is None:
														
 
															+        #         try:
														
 
															+        #             default_device = audio_interface.get_default_input_device_info()
														
 
															+        #             input_device_index = default_device['index']
														
 
															+        #         except OSError as e:
														
 
															+        #             input_device_index = None
														
 
															+
														
 
															+
														
 
															+        #     if input_device_index is not None:
														
 
															+        #         device_sample_rate = get_highest_sample_rate(audio_interface, input_device_index)
														
 
															+        #     else:
														
 
															+        #         device_sample_rate = 16000  # better: try 16000, 48000, ... until it works
														
 
															+
														
 
															+        #     stream = initialize_audio_stream(audio_interface, input_device_index, device_sample_rate, chunk_size)
														
 
															+
														
 
															+        #     if stream is None:
														
 
															+        #         raise Exception("Failed to initialize audio stream.")
														
 
															+
														
 
															+        # except Exception as e:
														
 
															+        #     logging.exception(f"Error initializing pyaudio audio recording: {e}")
														
 
															+        #     if audio_interface:
														
 
															+        #         audio_interface.terminate()
														
 
															+        #     raise
														
 
															+
														
 
															+        # logging.debug(f"Audio recording initialized successfully at {device_sample_rate} Hz, reading {chunk_size} frames at a time")
														
 
															+
														
 
															+        # buffer = bytearray()
														
 
															+        # silero_buffer_size = 2 * buffer_size  # silero complains if too short
														
 
															+
														
 
															+        # try:
														
 
															+        #     while not shutdown_event.is_set():
														
 
															+        #         try:
														
 
															+        #             data = stream.read(chunk_size)
														
 
															+                    
														
 
															+        #             if use_microphone.value:
														
 
															+        #                 processed_data = preprocess_audio(data, device_sample_rate, target_sample_rate)
														
 
															+        #                 buffer += processed_data
														
 
															+
														
 
															+        #                 # Check if the buffer has reached or exceeded the silero_buffer_size
														
 
															+        #                 while len(buffer) >= silero_buffer_size:
														
 
															+        #                     # Extract silero_buffer_size amount of data from the buffer
														
 
															+        #                     to_process = buffer[:silero_buffer_size]
														
 
															+        #                     buffer = buffer[silero_buffer_size:]
														
 
															+
														
 
															+        #                     # Feed the extracted data to the audio_queue
														
 
															+        #                     audio_queue.put(to_process)
														
 
															+
														
 
															+        #         except OSError as e:
														
 
															+        #             if e.errno == pyaudio.paInputOverflowed:
														
 
															+        #                 logging.warning("Input overflowed. Frame dropped.")
														
 
															+        #             else:
														
 
															+        #                 logging.error(f"Error during recording: {e}")
														
 
															+        #             continue
														
 
															+
														
 
															+        #         except Exception as e:
														
 
															+        #             logging.error(f"Error during recording: {e}")
														
 
															+        #             tb_str = traceback.format_exc()
														
 
															+        #             print(f"Traceback: {tb_str}")
														
 
															+        #             print(f"Error: {e}")
														
 
															+        #             continue
														
 
															+
														
 
															+        # except KeyboardInterrupt:
														
 
															+        #     interrupt_stop_event.set()
														
 
															+        #     logging.debug("Audio data worker process finished due to KeyboardInterrupt")
														
 
															+        # finally:
														
 
															+        #     # After recording stops, feed any remaining audio data
														
 
															+        #     if buffer:
														
 
															+        #         audio_queue.put(bytes(buffer))
														
 
															+            
														
 
															+        #     if stream:
														
 
															+        #         stream.stop_stream()
														
 
															+        #         stream.close()
														
 
															+        #     if audio_interface:
														
 
															+        #         audio_interface.terminate()
														
 
															+
														
 
															     def wakeup(self):
														
 
															         """
														
 
															         If in wake work modus, wake up as if a wake word was spoken.
														
@@ -1022,7 +1192,9 @@ class AudioToTextRecorder:
 
															         - Modifies the audio attribute to contain the processed audio data.
														
 
															         """
														
 
															-        self.listen_start = time.time()
														
 
															+        logging.info("Setting listen time")
														
 
															+        if self.listen_start == 0:
														
 
															+            self.listen_start = time.time()
														
 
															         # If not yet started recording, wait for voice activity to initiate.
														
 
															         if not self.is_recording and not self.frames:
														
@@ -1087,8 +1259,15 @@ class AudioToTextRecorder:
 
															         start_time = time.time()  # Start timing
														
 
															         with self.transcription_lock:
														
 
															             try:
														
 
															-                self.parent_transcription_pipe.send((self.audio, self.language))
														
 
															-                status, result = self.parent_transcription_pipe.recv()
														
 
															+                if self.transcribe_count == 0:
														
 
															+                    self.parent_transcription_pipe.send((self.audio, self.language))
														
 
															+                    self.transcribe_count += 1
														
 
															+
														
 
															+                while self.transcribe_count > 0:
														
 
															+                    logging.debug("Receive from parent_transcription_pipe pipe after sendiung transcription request")
														
 
															+                    status, result = self.parent_transcription_pipe.recv()
														
 
															+                    self.transcribe_count -= 1
														
 
															+
														
 
															                 self._set_state("inactive")
														
 
															                 if status == 'success':
														
 
															                     segments, info = result
														
@@ -1098,7 +1277,9 @@ class AudioToTextRecorder:
 
															                     transcription = self._preprocess_output(segments)
														
 
															                     end_time = time.time()  # End timing
														
 
															                     transcription_time = end_time - start_time
														
 
															-                    # print(f"Model {self.main_model_type} completed transcription in {transcription_time:.2f} seconds")
														
 
															+
														
 
															+                    if self.log_transcription_time:
														
 
															+                        logging.info(f"Model {self.main_model_type} completed transcription in {transcription_time:.2f} seconds")
														
 
															                     return transcription
														
 
															                 else:
														
 
															                     logging.error(f"Transcription error: {result}")
														
@@ -1118,7 +1299,7 @@ class AudioToTextRecorder:
 
															             )
														
 
															             porcupine_index = self.porcupine.process(pcm)
														
 
															             if self.debug_mode:
														
 
															-                print (f"wake words porcupine_index: {porcupine_index}")
														
 
															+                logging.info(f"wake words porcupine_index: {porcupine_index}")
														
 
															             return self.porcupine.process(pcm)
														
 
															         elif self.wakeword_backend in {'oww', 'openwakeword', 'openwakewords'}:
														
@@ -1135,15 +1316,16 @@ class AudioToTextRecorder:
 
															                         max_score = scores[-1]
														
 
															                         max_index = idx
														
 
															                 if self.debug_mode:
														
 
															-                    print (f"wake words oww max_index, max_score: {max_index} {max_score}")
														
 
															+                    logging.info(f"wake words oww max_index, max_score: {max_index} {max_score}")
														
 
															                 return max_index  
														
 
															             else:
														
 
															                 if self.debug_mode:
														
 
															-                    print (f"wake words oww_index: -1")
														
 
															+                    logging.info(f"wake words oww_index: -1")
														
 
															                 return -1
														
 
															         if self.debug_mode:        
														
 
															-            print("wake words no match")
														
 
															+            logging.info("wake words no match")
														
 
															+
														
 
															         return -1
														
 
															     def text(self,
														
@@ -1392,11 +1574,12 @@ class AudioToTextRecorder:
 
															                             data = self.audio_queue.get()
														
 
															                 except BrokenPipeError:
														
 
															-                    print("BrokenPipeError _recording_worker")
														
 
															+                    logging.error("BrokenPipeError _recording_worker")
														
 
															                     self.is_running = False
														
 
															                     break
														
 
															                 if not self.is_recording:
														
 
															+                    logging.info(f"not recording, state: {self.state}, self.recording_stop_time: {self.recording_stop_time}, self.listen_start: {self.listen_start}")
														
 
															                     # Handle not recording state
														
 
															                     time_since_listen_start = (time.time() - self.listen_start
														
 
															                                                if self.listen_start else 0)
														
@@ -1506,6 +1689,11 @@ class AudioToTextRecorder:
 
															                             # measuring silence time before stopping recording
														
 
															                             if self.speech_end_silence_start == 0:
														
 
															                                 self.speech_end_silence_start = time.time()
														
 
															+                                # if(len(self.frames) > 0):
														
 
															+                                #     audio_array = np.frombuffer(b''.join(self.frames), dtype=np.int16)
														
 
															+                                #     audio = audio_array.astype(np.float32) / INT16_MAX_ABS_VALUE
														
 
															+                                #     self.parent_transcription_pipe.send((audio, self.language))
														
 
															+                                #     self.transcribe_count += 1                                
														
 
															                         else:
														
 
															                             self.speech_end_silence_start = 0
														
@@ -1515,7 +1703,16 @@ class AudioToTextRecorder:
 
															                                 self.post_speech_silence_duration:
														
 
															                             logging.info("voice deactivity detected")
														
 
															                             self.frames.append(data)
														
 
															+                            logging.info("stopping recording")
														
 
															                             self.stop()
														
 
															+                            logging.info("stopped recording")
														
 
															+
														
 
															+
														
 
															+                            ####
														
 
															+                            if not self.use_wake_words:
														
 
															+                                self.listen_start = time.time()
														
 
															+                                self._set_state("listening")
														
 
															+                                self.start_recording_on_voice_activity = True    
														
 
															                 if not self.is_recording and was_recording:
														
 
															                     # Reset after stopping recording to ensure clean state
														
@@ -1583,6 +1780,8 @@ class AudioToTextRecorder:
 
															                         dtype=np.int16
														
 
															                         )
														
 
															+                    logging.debug(f"Current realtime buffer size: {len(audio_array)}")
														
 
															+
														
 
															                     # Normalize the array to a [-1, 1] range
														
 
															                     audio_array = audio_array.astype(np.float32) / \
														
 
															                         INT16_MAX_ABS_VALUE
														
@@ -1592,12 +1791,14 @@ class AudioToTextRecorder:
 
															                             try:
														
 
															                                 self.parent_transcription_pipe.send((audio_array, self.language))
														
 
															                                 if self.parent_transcription_pipe.poll(timeout=5):  # Wait for 5 seconds
														
 
															+                                    logging.debug("Receive from realtime worker after transcription request to main model")
														
 
															                                     status, result = self.parent_transcription_pipe.recv()
														
 
															                                     if status == 'success':
														
 
															                                         segments, info = result
														
 
															                                         self.detected_realtime_language = info.language if info.language_probability > 0 else None
														
 
															                                         self.detected_realtime_language_probability = info.language_probability
														
 
															                                         realtime_text = segments
														
 
															+                                        logging.debug(f"Realtime text detected with main model: {realtime_text}")
														
 
															                                     else:
														
 
															                                         logging.error(f"Realtime transcription error: {result}")
														
 
															                                         continue
														
@@ -1622,6 +1823,7 @@ class AudioToTextRecorder:
 
															                         realtime_text = " ".join(
														
 
															                             seg.text for seg in segments
														
 
															                         )
														
 
															+                        logging.debug(f"Realtime text detected: {realtime_text}")
														
 
															                     # double check recording state
														
 
															                     # because it could have changed mid-transcription
														
@@ -1764,19 +1966,19 @@ class AudioToTextRecorder:
 
															                 speech_frames += 1
														
 
															                 if not all_frames_must_be_true:
														
 
															                     if self.debug_mode:
														
 
															-                        print(f"Speech detected in frame {i + 1}"
														
 
															+                        logging.info(f"Speech detected in frame {i + 1}"
														
 
															                               f" of {num_frames}")
														
 
															                     return True
														
 
															         if all_frames_must_be_true:
														
 
															             if self.debug_mode and speech_frames == num_frames:
														
 
															-                print(f"Speech detected in {speech_frames} of "
														
 
															+                logging.info(f"Speech detected in {speech_frames} of "
														
 
															                       f"{num_frames} frames")
														
 
															             elif self.debug_mode:
														
 
															-                print(f"Speech not detected in all {num_frames} frames")
														
 
															+                logging.info(f"Speech not detected in all {num_frames} frames")
														
 
															             return speech_frames == num_frames
														
 
															         else:
														
 
															             if self.debug_mode:
														
 
															-                print(f"Speech not detected in any of {num_frames} frames")
														
 
															+                logging.info(f"Speech not detected in any of {num_frames} frames")
														
 
															             return False
														
 
															     def _check_voice_activity(self, data):
														
@@ -1841,6 +2043,9 @@ class AudioToTextRecorder:
 
															         # Update to the new state
														
 
															         self.state = new_state
														
 
															+        # Log the state change
														
 
															+        logging.info(f"State changed from '{old_state}' to '{new_state}'")
														
 
															+
														
 
															         # Execute callbacks based on transitioning FROM a particular state
														
 
															         if old_state == "listening":
														
 
															             if self.on_vad_detect_stop: