Explorar el Código

feat: reduce realtime transcription load by only transcribing if new frames arrive

oddlama hace 10 meses
padre
commit
e9f4fe641b
Se han modificado 1 ficheros con 8 adiciones y 0 borrados
  1. 8 0
      RealtimeSTT/audio_recorder.py

+ 8 - 0
RealtimeSTT/audio_recorder.py

@@ -537,6 +537,8 @@ class AudioToTextRecorder:
                        self.pre_recording_buffer_duration)
         )
         self.frames = []
+        self.new_frames = mp.Event()
+        self.new_frames.set()
 
         # Recording control flags
         self.is_recording = False
@@ -807,6 +809,7 @@ class AudioToTextRecorder:
         audio_array = np.frombuffer(b''.join(self.frames), dtype=np.int16)
         self.audio = audio_array.astype(np.float32) / INT16_MAX_ABS_VALUE
         self.frames.clear()
+        self.new_frames.set()
 
         # Reset recording-related timestamps
         self.recording_stop_time = 0
@@ -915,6 +918,7 @@ class AudioToTextRecorder:
         self.wakeword_detected = False
         self.wake_word_detect_time = 0
         self.frames = []
+        self.new_frames.set()
         self.is_recording = True
         self.recording_start_time = time.time()
         self.is_silero_speech_active = False
@@ -1185,6 +1189,7 @@ class AudioToTextRecorder:
                                 # Add the buffered audio
                                 # to the recording frames
                                 self.frames.extend(list(self.audio_buffer))
+                                self.new_frames.set()
                                 self.audio_buffer.clear()
 
                             self.silero_vad_model.reset_states()
@@ -1238,6 +1243,7 @@ class AudioToTextRecorder:
 
                 if self.is_recording:
                     self.frames.append(data)
+                    self.new_frames.set()
 
                 if not self.is_recording or self.speech_end_silence_start:
                     self.audio_buffer.append(data)
@@ -1271,6 +1277,8 @@ class AudioToTextRecorder:
 
                 # Check if the recording is active
                 if self.is_recording:
+                    self.new_frames.wait()
+                    self.new_frames.clear()
 
                     # Sleep for the duration of the transcription resolution
                     time.sleep(self.realtime_processing_pause)