há 1 ano atrás · cee57e30eb
--- a/README.md
+++ b/README.md
@@ -16,6 +16,11 @@ https://github.com/KoljaB/RealtimeSTT/assets/7604638/207cb9a2-4482-48e7-9d2b-072
 
				 
			
 
				 ### Updates
			
 
				 
			
 
				+#### v0.1.7
			
 
				+- Bugfix for Mac OS Installation (multiprocessing / queue.size())
			
 
				+- KeyboardInterrupt handling (now abortable with CTRL+C)
			
 
				+- Bugfix for spinner handling (could lead to exception in some cases)
			
 
				+
			
 
				 #### v0.1.6
			
 
				 - Implements context manager protocol (recorder can be used in a `with` statement)
			
 
				 - Bugfix for resource management in shutdown method
			
--- a/RealtimeSTT/audio_recorder.py
+++ b/RealtimeSTT/audio_recorder.py
@@ -229,13 +229,14 @@ class AudioToTextRecorder:
 
				         logging.info(f"Starting RealTimeSTT")
			
 
				 
			
 
				         # Start transcription process
			
 
				+        self.interrupt_stop_event = Event()
			
 
				         self.main_transcription_ready_event = Event()
			
 
				         self.parent_transcription_pipe, child_transcription_pipe = Pipe()
			
 
				-        self.transcript_process = Process(target=AudioToTextRecorder._transcription_worker, args=(child_transcription_pipe, model, self.main_transcription_ready_event, self.shutdown_event))
			
 
				+        self.transcript_process = Process(target=AudioToTextRecorder._transcription_worker, args=(child_transcription_pipe, model, self.main_transcription_ready_event, self.shutdown_event, self.interrupt_stop_event))
			
 
				         self.transcript_process.start()
			
 
				 
			
 
				         # Start audio data reading process
			
 
				-        self.reader_process = Process(target=AudioToTextRecorder._audio_data_worker, args=(self.audio_queue, self.sample_rate, self.buffer_size, self.shutdown_event))
			
 
				+        self.reader_process = Process(target=AudioToTextRecorder._audio_data_worker, args=(self.audio_queue, self.sample_rate, self.buffer_size, self.shutdown_event, self.interrupt_stop_event))
			
 
				         self.reader_process.start()
			
 
				 
			
 
				         # Initialize the realtime transcription model
			
@@ -326,7 +327,7 @@ class AudioToTextRecorder:
 
				 
			
 
				 
			
 
				     @staticmethod
			
 
				-    def _transcription_worker(conn, model_path, ready_event, shutdown_event):
			
 
				+    def _transcription_worker(conn, model_path, ready_event, shutdown_event, interrupt_stop_event):
			
 
				         """
			
 
				         Worker method that handles the continuous process of transcribing audio data.
			
 
				 
			
@@ -363,25 +364,30 @@ class AudioToTextRecorder:
 
				         logging.debug('Faster_whisper main speech to text transcription model initialized successfully')
			
 
				 
			
 
				         while not shutdown_event.is_set():
			
 
				-            if conn.poll(0.5):
			
 
				-                audio, language = conn.recv()
			
 
				-                try:
			
 
				-                    segments = model.transcribe(audio, language=language if language else None)[0]
			
 
				-                    transcription = " ".join(seg.text for seg in segments).strip()
			
 
				-                    conn.send(('success', transcription))
			
 
				-                except faster_whisper.WhisperError as e:
			
 
				-                    logging.error(f"Whisper transcription error: {e}")
			
 
				-                    conn.send(('error', str(e)))      
			
 
				-                except Exception as e:
			
 
				-                    logging.error(f"General transcription error: {e}")
			
 
				-                    conn.send(('error', str(e)))
			
 
				-            else:
			
 
				-                # If there's no data, sleep for a short while to prevent busy waiting
			
 
				-                time.sleep(0.02)
			
 
				+            try:
			
 
				+                if conn.poll(0.5):
			
 
				+                    audio, language = conn.recv()
			
 
				+                    try:
			
 
				+                        segments = model.transcribe(audio, language=language if language else None)[0]
			
 
				+                        transcription = " ".join(seg.text for seg in segments).strip()
			
 
				+                        conn.send(('success', transcription))
			
 
				+                    except faster_whisper.WhisperError as e:
			
 
				+                        logging.error(f"Whisper transcription error: {e}")
			
 
				+                        conn.send(('error', str(e)))      
			
 
				+                    except Exception as e:
			
 
				+                        logging.error(f"General transcription error: {e}")
			
 
				+                        conn.send(('error', str(e)))
			
 
				+                else:
			
 
				+                    # If there's no data, sleep for a short while to prevent busy waiting
			
 
				+                    time.sleep(0.02)
			
 
				+            except KeyboardInterrupt:
			
 
				+                interrupt_stop_event.set()
			
 
				+                logging.debug('Transcription worker process finished due to KeyboardInterrupt')
			
 
				+                break
			
 
				 
			
 
				 
			
 
				     @staticmethod
			
 
				-    def _audio_data_worker(audio_queue, sample_rate, buffer_size, shutdown_event):
			
 
				+    def _audio_data_worker(audio_queue, sample_rate, buffer_size, shutdown_event, interrupt_stop_event):
			
 
				         """
			
 
				         Worker method that handles the audio recording process.
			
 
				 
			
@@ -434,7 +440,11 @@ class AudioToTextRecorder:
 
				                     print (f"Error: {e}")
			
 
				                     continue
			
 
				 
			
 
				-                audio_queue.put(data)                
			
 
				+                audio_queue.put(data)
			
 
				+
			
 
				+        except KeyboardInterrupt:
			
 
				+            interrupt_stop_event.set()
			
 
				+            logging.debug('Audio data worker process finished due to KeyboardInterrupt')
			
 
				         finally:
			
 
				             stream.stop_stream()
			
 
				             stream.close()
			
@@ -464,14 +474,16 @@ class AudioToTextRecorder:
 
				             self.start_recording_on_voice_activity = True
			
 
				 
			
 
				             # Wait until recording starts
			
 
				-            self.start_recording_event.wait()
			
 
				+            while not self.interrupt_stop_event.is_set():
			
 
				+                if (self.start_recording_event.wait(timeout=0.5)): break
			
 
				 
			
 
				         # If recording is ongoing, wait for voice inactivity to finish recording.
			
 
				         if self.is_recording:
			
 
				             self.stop_recording_on_voice_deactivity = True
			
 
				 
			
 
				             # Wait until recording stops
			
 
				-            self.stop_recording_event.wait()
			
 
				+            while not self.interrupt_stop_event.is_set():
			
 
				+                if (self.stop_recording_event.wait(timeout=0.5)): break
			
 
				 
			
 
				         # Convert recorded frames to the appropriate audio format.
			
 
				         audio_array = np.frombuffer(b''.join(self.frames), dtype=np.int16)
			
@@ -537,7 +549,7 @@ class AudioToTextRecorder:
 
				 
			
 
				         self.wait_audio()
			
 
				 
			
 
				-        if self.is_shut_down:
			
 
				+        if self.is_shut_down or self.interrupt_stop_event.is_set():
			
 
				             return ""
			
 
				 
			
 
				         if on_transcription_finished:
			
@@ -661,16 +673,23 @@ class AudioToTextRecorder:
 
				             # Continuously monitor audio for voice activity
			
 
				             while self.is_running:
			
 
				 
			
 
				-                data = self.audio_queue.get()
			
 
				+                try:
			
 
				 
			
 
				-                # Handle queue overflow
			
 
				-                queue_overflow_logged = False
			
 
				-                while self.audio_queue.qsize() > self.allowed_latency_limit:
			
 
				-                    if not queue_overflow_logged:
			
 
				-                        logging.warning(f"Audio queue size exceeds latency limit. Current size: {self.audio_queue.qsize()}. Discarding old audio chunks.")
			
 
				-                        queue_overflow_logged = True
			
 
				                     data = self.audio_queue.get()
			
 
				 
			
 
				+                    # Handle queue overflow
			
 
				+                    queue_overflow_logged = False
			
 
				+                    while self.audio_queue.qsize() > self.allowed_latency_limit:
			
 
				+                        if not queue_overflow_logged:
			
 
				+                            logging.warning(f"Audio queue size exceeds latency limit. Current size: {self.audio_queue.qsize()}. Discarding old audio chunks.")
			
 
				+                            queue_overflow_logged = True
			
 
				+                        data = self.audio_queue.get()
			
 
				+
			
 
				+                except BrokenPipeError:
			
 
				+                    print ("BrokenPipeError _recording_worker")
			
 
				+                    self.is_running = False
			
 
				+                    break
			
 
				+
			
 
				                 if not self.is_recording:
			
 
				                     # Handle not recording state
			
 
				 
			
@@ -791,8 +810,9 @@ class AudioToTextRecorder:
 
				 
			
 
				 
			
 
				         except Exception as e:
			
 
				-            logging.error(f"Unhandled exeption in _recording_worker: {e}")
			
 
				-            raise
			
 
				+            if not self.interrupt_stop_event.is_set():
			
 
				+                logging.error(f"Unhandled exeption in _recording_worker: {e}")
			
 
				+                raise
			
 
				 
			
 
				 
			
 
				     def _realtime_worker(self):
			
@@ -989,23 +1009,23 @@ class AudioToTextRecorder:
 
				             if self.on_vad_detect_start:
			
 
				                 self.on_vad_detect_start()
			
 
				             self._set_spinner("speak now")
			
 
				-            if self.spinner:
			
 
				+            if self.spinner and self.halo:
			
 
				                 self.halo._interval = 250
			
 
				         elif new_state == "wakeword":
			
 
				             if self.on_wakeword_detection_start:
			
 
				                 self.on_wakeword_detection_start()
			
 
				             self._set_spinner(f"say {self.wake_words}")
			
 
				-            if self.spinner:
			
 
				+            if self.spinner and self.halo:
			
 
				                 self.halo._interval = 500
			
 
				         elif new_state == "transcribing":
			
 
				             if self.on_transcription_start:
			
 
				                 self.on_transcription_start()
			
 
				             self._set_spinner("transcribing")
			
 
				-            if self.spinner:
			
 
				+            if self.spinner and self.halo:
			
 
				                 self.halo._interval = 50
			
 
				         elif new_state == "recording":
			
 
				             self._set_spinner("recording")
			
 
				-            if self.spinner:
			
 
				+            if self.spinner and self.halo:
			
 
				                 self.halo._interval = 100
			
 
				         elif new_state == "inactive":
			
 
				             if self.spinner and self.halo:
			
--- a/requirements.txt
+++ b/requirements.txt
@@ -3,5 +3,5 @@ faster-whisper>=0.7.1
 
				 pvporcupine==1.9.5
			
 
				 webrtcvad>=2.0.10
			
 
				 halo>=0.0.31
			
 
				-torch==2.0.1
			
 
				-torchaudio==2.0.2
			
 
				+torch>=2.0.1
			
 
				+torchaudio>=2.0.2
			
--- a/tests/simple_test.py
+++ b/tests/simple_test.py
@@ -1,6 +1,6 @@
 
				 from RealtimeSTT import AudioToTextRecorder
			
 
				 if __name__ == '__main__':
			
 
				-    recorder = AudioToTextRecorder(spinner=False)
			
 
				+    recorder = AudioToTextRecorder(spinner=False, model="tiny.en", language="en")
			
 
				 
			
 
				     print("Say something...")
			
 
				     while (True): print(recorder.text(), end=" ", flush=True)