11 months ago · 60f11672de
--- a/README.md
+++ b/README.md
@@ -22,7 +22,7 @@ https://github.com/KoljaB/RealtimeSTT/assets/7604638/207cb9a2-4482-48e7-9d2b-072
 
				 
			
 
				 ### Updates
			
 
				 
			
 
				-Latest Version: v0.2.1
			
 
				+Latest Version: v0.2.2
			
 
				 
			
 
				 See [release history](https://github.com/KoljaB/RealtimeSTT/releases).
			
 
				 
			
@@ -333,6 +333,8 @@ When you initialize the `AudioToTextRecorder` class, you have various options to
 
				 
			
 
				 - **silero_use_onnx** (bool, default=False): Enables usage of the pre-trained model from Silero in the ONNX (Open Neural Network Exchange) format instead of the PyTorch format. Default is False. Recommended for faster performance.
			
 
				 
			
 
				+- **silero_deactivity_detection** (bool, default=False): Enables the Silero model for end-of-speech detection. More robust against background noise. Utilizes additional GPU resources but improves accuracy in noisy environments. When False, uses the default WebRTC VAD, which is more sensitive but may continue recording longer due to background sounds.
			
 
				+
			
 
				 - **webrtc_sensitivity** (int, default=3): Sensitivity for the WebRTC Voice Activity Detection engine ranging from 0 (least aggressive / most sensitive) to 3 (most aggressive, least sensitive). Default is 3.
			
 
				 
			
 
				 - **post_speech_silence_duration** (float, default=0.2): Duration in seconds of silence that must follow speech before the recording is considered to be completed. This ensures that any brief pauses during speech don't prematurely end the recording.
			
--- a/RealtimeSTT/audio_recorder.py
+++ b/RealtimeSTT/audio_recorder.py
@@ -115,6 +115,7 @@ class AudioToTextRecorder:
 
				                  # Voice activation parameters
			
 
				                  silero_sensitivity: float = INIT_SILERO_SENSITIVITY,
			
 
				                  silero_use_onnx: bool = False,
			
 
				+                 silero_deactivity_detection: bool = False,
			
 
				                  webrtc_sensitivity: int = INIT_WEBRTC_SENSITIVITY,
			
 
				                  post_speech_silence_duration: float = (
			
 
				                      INIT_POST_SPEECH_SILENCE_DURATION
			
@@ -228,6 +229,12 @@ class AudioToTextRecorder:
 
				             pre-trained model from Silero in the ONNX (Open Neural Network
			
 
				             Exchange) format instead of the PyTorch format. This is
			
 
				             recommended for faster performance.
			
 
				+		- silero_deactivity_detection (bool, default=False): Enables the Silero
			
 
				+            model for end-of-speech detection. More robust against background
			
 
				+            noise. Utilizes additional GPU resources but improves accuracy in
			
 
				+            noisy environments. When False, uses the default WebRTC VAD,
			
 
				+            which is more sensitive but may continue recording longer due
			
 
				+            to background sounds.
			
 
				         - webrtc_sensitivity (int, default=WEBRTC_SENSITIVITY): Sensitivity
			
 
				             for the WebRTC Voice Activity Detection engine ranging from 0
			
 
				             (least aggressive / most sensitive) to 3 (most aggressive,
			
@@ -381,6 +388,7 @@ class AudioToTextRecorder:
 
				         self.silero_working = False
			
 
				         self.speech_end_silence_start = 0
			
 
				         self.silero_sensitivity = silero_sensitivity
			
 
				+        self.silero_deactivity_detection = silero_deactivity_detection
			
 
				         self.listen_start = 0
			
 
				         self.spinner = spinner
			
 
				         self.halo = None
			
@@ -1350,14 +1358,16 @@ class AudioToTextRecorder:
 
				 
			
 
				                     # Stop the recording if silence is detected after speech
			
 
				                     if self.stop_recording_on_voice_deactivity:
			
 
				+                        is_speech = (
			
 
				+                            self._is_silero_speech(data) if self.silero_deactivity_detection
			
 
				+                            else self._is_webrtc_speech(data, True)
			
 
				+                        )
			
 
				 
			
 
				-                        if not self._is_webrtc_speech(data, True):
			
 
				-
			
 
				+                        if not is_speech:
			
 
				                             # Voice deactivity was detected, so we start
			
 
				                             # measuring silence time before stopping recording
			
 
				                             if self.speech_end_silence_start == 0:
			
 
				                                 self.speech_end_silence_start = time.time()
			
 
				-
			
 
				                         else:
			
 
				                             self.speech_end_silence_start = 0