Ver Fonte

some Bugfixes

Kolja Beigel há 1 ano atrás
pai
commit
cee57e30eb
4 ficheiros alterados com 64 adições e 39 exclusões
  1. 5 0
      README.md
  2. 56 36
      RealtimeSTT/audio_recorder.py
  3. 2 2
      requirements.txt
  4. 1 1
      tests/simple_test.py

+ 5 - 0
README.md

@@ -16,6 +16,11 @@ https://github.com/KoljaB/RealtimeSTT/assets/7604638/207cb9a2-4482-48e7-9d2b-072
 
 ### Updates
 
+#### v0.1.7
+- Bugfix for Mac OS Installation (multiprocessing / queue.size())
+- KeyboardInterrupt handling (now abortable with CTRL+C)
+- Bugfix for spinner handling (could lead to exception in some cases)
+
 #### v0.1.6
 - Implements context manager protocol (recorder can be used in a `with` statement)
 - Bugfix for resource management in shutdown method

+ 56 - 36
RealtimeSTT/audio_recorder.py

@@ -229,13 +229,14 @@ class AudioToTextRecorder:
         logging.info(f"Starting RealTimeSTT")
 
         # Start transcription process
+        self.interrupt_stop_event = Event()
         self.main_transcription_ready_event = Event()
         self.parent_transcription_pipe, child_transcription_pipe = Pipe()
-        self.transcript_process = Process(target=AudioToTextRecorder._transcription_worker, args=(child_transcription_pipe, model, self.main_transcription_ready_event, self.shutdown_event))
+        self.transcript_process = Process(target=AudioToTextRecorder._transcription_worker, args=(child_transcription_pipe, model, self.main_transcription_ready_event, self.shutdown_event, self.interrupt_stop_event))
         self.transcript_process.start()
 
         # Start audio data reading process
-        self.reader_process = Process(target=AudioToTextRecorder._audio_data_worker, args=(self.audio_queue, self.sample_rate, self.buffer_size, self.shutdown_event))
+        self.reader_process = Process(target=AudioToTextRecorder._audio_data_worker, args=(self.audio_queue, self.sample_rate, self.buffer_size, self.shutdown_event, self.interrupt_stop_event))
         self.reader_process.start()
 
         # Initialize the realtime transcription model
@@ -326,7 +327,7 @@ class AudioToTextRecorder:
 
 
     @staticmethod
-    def _transcription_worker(conn, model_path, ready_event, shutdown_event):
+    def _transcription_worker(conn, model_path, ready_event, shutdown_event, interrupt_stop_event):
         """
         Worker method that handles the continuous process of transcribing audio data.
 
@@ -363,25 +364,30 @@ class AudioToTextRecorder:
         logging.debug('Faster_whisper main speech to text transcription model initialized successfully')
 
         while not shutdown_event.is_set():
-            if conn.poll(0.5):
-                audio, language = conn.recv()
-                try:
-                    segments = model.transcribe(audio, language=language if language else None)[0]
-                    transcription = " ".join(seg.text for seg in segments).strip()
-                    conn.send(('success', transcription))
-                except faster_whisper.WhisperError as e:
-                    logging.error(f"Whisper transcription error: {e}")
-                    conn.send(('error', str(e)))      
-                except Exception as e:
-                    logging.error(f"General transcription error: {e}")
-                    conn.send(('error', str(e)))
-            else:
-                # If there's no data, sleep for a short while to prevent busy waiting
-                time.sleep(0.02)
+            try:
+                if conn.poll(0.5):
+                    audio, language = conn.recv()
+                    try:
+                        segments = model.transcribe(audio, language=language if language else None)[0]
+                        transcription = " ".join(seg.text for seg in segments).strip()
+                        conn.send(('success', transcription))
+                    except faster_whisper.WhisperError as e:
+                        logging.error(f"Whisper transcription error: {e}")
+                        conn.send(('error', str(e)))      
+                    except Exception as e:
+                        logging.error(f"General transcription error: {e}")
+                        conn.send(('error', str(e)))
+                else:
+                    # If there's no data, sleep for a short while to prevent busy waiting
+                    time.sleep(0.02)
+            except KeyboardInterrupt:
+                interrupt_stop_event.set()
+                logging.debug('Transcription worker process finished due to KeyboardInterrupt')
+                break
 
 
     @staticmethod
-    def _audio_data_worker(audio_queue, sample_rate, buffer_size, shutdown_event):
+    def _audio_data_worker(audio_queue, sample_rate, buffer_size, shutdown_event, interrupt_stop_event):
         """
         Worker method that handles the audio recording process.
 
@@ -434,7 +440,11 @@ class AudioToTextRecorder:
                     print (f"Error: {e}")
                     continue
 
-                audio_queue.put(data)                
+                audio_queue.put(data)
+
+        except KeyboardInterrupt:
+            interrupt_stop_event.set()
+            logging.debug('Audio data worker process finished due to KeyboardInterrupt')
         finally:
             stream.stop_stream()
             stream.close()
@@ -464,14 +474,16 @@ class AudioToTextRecorder:
             self.start_recording_on_voice_activity = True
 
             # Wait until recording starts
-            self.start_recording_event.wait()
+            while not self.interrupt_stop_event.is_set():
+                if (self.start_recording_event.wait(timeout=0.5)): break
 
         # If recording is ongoing, wait for voice inactivity to finish recording.
         if self.is_recording:
             self.stop_recording_on_voice_deactivity = True
 
             # Wait until recording stops
-            self.stop_recording_event.wait()
+            while not self.interrupt_stop_event.is_set():
+                if (self.stop_recording_event.wait(timeout=0.5)): break
 
         # Convert recorded frames to the appropriate audio format.
         audio_array = np.frombuffer(b''.join(self.frames), dtype=np.int16)
@@ -537,7 +549,7 @@ class AudioToTextRecorder:
 
         self.wait_audio()
 
-        if self.is_shut_down:
+        if self.is_shut_down or self.interrupt_stop_event.is_set():
             return ""
 
         if on_transcription_finished:
@@ -661,16 +673,23 @@ class AudioToTextRecorder:
             # Continuously monitor audio for voice activity
             while self.is_running:
 
-                data = self.audio_queue.get()
+                try:
 
-                # Handle queue overflow
-                queue_overflow_logged = False
-                while self.audio_queue.qsize() > self.allowed_latency_limit:
-                    if not queue_overflow_logged:
-                        logging.warning(f"Audio queue size exceeds latency limit. Current size: {self.audio_queue.qsize()}. Discarding old audio chunks.")
-                        queue_overflow_logged = True
                     data = self.audio_queue.get()
 
+                    # Handle queue overflow
+                    queue_overflow_logged = False
+                    while self.audio_queue.qsize() > self.allowed_latency_limit:
+                        if not queue_overflow_logged:
+                            logging.warning(f"Audio queue size exceeds latency limit. Current size: {self.audio_queue.qsize()}. Discarding old audio chunks.")
+                            queue_overflow_logged = True
+                        data = self.audio_queue.get()
+
+                except BrokenPipeError:
+                    print ("BrokenPipeError _recording_worker")
+                    self.is_running = False
+                    break
+
                 if not self.is_recording:
                     # Handle not recording state
 
@@ -791,8 +810,9 @@ class AudioToTextRecorder:
 
 
         except Exception as e:
-            logging.error(f"Unhandled exeption in _recording_worker: {e}")
-            raise
+            if not self.interrupt_stop_event.is_set():
+                logging.error(f"Unhandled exeption in _recording_worker: {e}")
+                raise
 
 
     def _realtime_worker(self):
@@ -989,23 +1009,23 @@ class AudioToTextRecorder:
             if self.on_vad_detect_start:
                 self.on_vad_detect_start()
             self._set_spinner("speak now")
-            if self.spinner:
+            if self.spinner and self.halo:
                 self.halo._interval = 250
         elif new_state == "wakeword":
             if self.on_wakeword_detection_start:
                 self.on_wakeword_detection_start()
             self._set_spinner(f"say {self.wake_words}")
-            if self.spinner:
+            if self.spinner and self.halo:
                 self.halo._interval = 500
         elif new_state == "transcribing":
             if self.on_transcription_start:
                 self.on_transcription_start()
             self._set_spinner("transcribing")
-            if self.spinner:
+            if self.spinner and self.halo:
                 self.halo._interval = 50
         elif new_state == "recording":
             self._set_spinner("recording")
-            if self.spinner:
+            if self.spinner and self.halo:
                 self.halo._interval = 100
         elif new_state == "inactive":
             if self.spinner and self.halo:

+ 2 - 2
requirements.txt

@@ -3,5 +3,5 @@ faster-whisper>=0.7.1
 pvporcupine==1.9.5
 webrtcvad>=2.0.10
 halo>=0.0.31
-torch==2.0.1
-torchaudio==2.0.2
+torch>=2.0.1
+torchaudio>=2.0.2

+ 1 - 1
tests/simple_test.py

@@ -1,6 +1,6 @@
 from RealtimeSTT import AudioToTextRecorder
 if __name__ == '__main__':
-    recorder = AudioToTextRecorder(spinner=False)
+    recorder = AudioToTextRecorder(spinner=False, model="tiny.en", language="en")
 
     print("Say something...")
     while (True): print(recorder.text(), end=" ", flush=True)