9 달 전 · 1d8da19ac8
--- a/tests/realtimestt_test.py
+++ b/tests/realtimestt_test.py
@@ -9,8 +9,11 @@ if __name__ == '__main__':
 
				     import os
			
 
				     import sys
			
 
				     from RealtimeSTT import AudioToTextRecorder
			
 
				-    from colorama import Fore, Back, Style
			
 
				+    from colorama import Fore, Style
			
 
				     import colorama
			
 
				+    from rich.live import Live
			
 
				+    from rich.console import Console
			
 
				+    from rich.text import Text
			
 
				 
			
 
				     if os.name == "nt" and (3, 8) <= sys.version_info < (3, 99):
			
 
				         from torchaudio._extension.utils import _init_dll_path
			
@@ -20,40 +23,58 @@ if __name__ == '__main__':
 
				 
			
 
				     colorama.init()
			
 
				 
			
 
				+    # Initialize Rich Console and Live
			
 
				+    console = Console()
			
 
				+    live = Live(console=console, refresh_per_second=10, screen=False)
			
 
				+    live.start()
			
 
				+
			
 
				     full_sentences = []
			
 
				     displayed_text = ""
			
 
				     prev_text = ""
			
 
				+    rich_text_stored = ""
			
 
				     recorder = None
			
 
				 
			
 
				     end_of_sentence_detection_pause = 0.4
			
 
				-    mid_sentence_detection_pause = 0.7
			
 
				+    unknown_sentence_detection_pause = 0.7
			
 
				+    mid_sentence_detection_pause = 2.0
			
 
				 
			
 
				     def clear_console():
			
 
				         os.system('clear' if os.name == 'posix' else 'cls')
			
 
				 
			
 
				     def text_detected(text):
			
 
				-        global displayed_text, prev_text
			
 
				+        global displayed_text, prev_text, full_sentences, recorder, rich_text_stored
			
 
				         sentence_end_marks = ['.', '!', '?', '。'] 
			
 
				-        if text and text[-1] in sentence_end_marks and prev_text and prev_text[-1] in sentence_end_marks:
			
 
				+        if text.endswith("..."):
			
 
				+            recorder.post_speech_silence_duration = mid_sentence_detection_pause
			
 
				+        elif text and text[-1] in sentence_end_marks and prev_text and prev_text[-1] in sentence_end_marks:
			
 
				             recorder.post_speech_silence_duration = end_of_sentence_detection_pause
			
 
				         else:
			
 
				-            recorder.post_speech_silence_duration = mid_sentence_detection_pause
			
 
				+            recorder.post_speech_silence_duration = unknown_sentence_detection_pause
			
 
				 
			
 
				         prev_text = text
			
 
				 
			
 
				-        sentences_with_style = [
			
 
				-            f"{Fore.YELLOW + sentence + Style.RESET_ALL if i % 2 == 0 else Fore.CYAN + sentence + Style.RESET_ALL} "
			
 
				-            for i, sentence in enumerate(full_sentences)
			
 
				-        ]
			
 
				-        new_text = "".join(sentences_with_style).strip() + " " + text if len(sentences_with_style) > 0 else text
			
 
				-
			
 
				-        if new_text != displayed_text:
			
 
				-            displayed_text = new_text
			
 
				-            clear_console()
			
 
				-            print(displayed_text, end="", flush=True)
			
 
				+        # Build Rich Text with alternating colors
			
 
				+        rich_text = Text()
			
 
				+        for i, sentence in enumerate(full_sentences):
			
 
				+            if i % 2 == 0:
			
 
				+                rich_text += Text(sentence, style="yellow") + Text(" ")
			
 
				+            else:
			
 
				+                rich_text += Text(sentence, style="cyan") + Text(" ")
			
 
				+        
			
 
				+        # If the current text is not a sentence-ending, display it in real-time
			
 
				+        if text:
			
 
				+            rich_text += Text(text, style="white")
			
 
				+
			
 
				+        new_displayed_text = rich_text.plain
			
 
				+
			
 
				+        if new_displayed_text != displayed_text:
			
 
				+            displayed_text = new_displayed_text
			
 
				+            live.update(rich_text)
			
 
				+            rich_text_stored = rich_text
			
 
				 
			
 
				     def process_text(text):
			
 
				-        recorder.post_speech_silence_duration = end_of_sentence_detection_pause
			
 
				+        global recorder, full_sentences, prev_text
			
 
				+        recorder.post_speech_silence_duration = unknown_sentence_detection_pause
			
 
				         full_sentences.append(text)
			
 
				         prev_text = ""
			
 
				         text_detected("")
			
@@ -62,23 +83,23 @@ if __name__ == '__main__':
 
				     recorder_config = {
			
 
				         'spinner': False,
			
 
				         'model': 'large-v2',
			
 
				+        # 'input_device_index': 1,
			
 
				         'realtime_model_type': 'tiny.en',
			
 
				         'language': 'en',
			
 
				-        'input_device_index': 1,
			
 
				         'silero_sensitivity': 0.05,
			
 
				         'webrtc_sensitivity': 3,
			
 
				-        'post_speech_silence_duration': end_of_sentence_detection_pause,
			
 
				-        'min_length_of_recording': 0,
			
 
				+        'post_speech_silence_duration': unknown_sentence_detection_pause,
			
 
				+        'min_length_of_recording': 0.7,        
			
 
				         'min_gap_between_recordings': 0,                
			
 
				         'enable_realtime_transcription': True,
			
 
				         'realtime_processing_pause': 0.1,
			
 
				-        'on_realtime_transcription_update': text_detected,
			
 
				+        #'on_realtime_transcription_update': text_detected,
			
 
				+        'on_realtime_transcription_stabilized': text_detected,
			
 
				         'silero_deactivity_detection': True,
			
 
				-        'min_length_of_recording': 0.7,        
			
 
				         'early_transcription_on_silence': 0.2,
			
 
				         'beam_size': 5,
			
 
				         'beam_size_realtime': 1,
			
 
				-        'no_log_file': False,
			
 
				+        'no_log_file': True,
			
 
				     }
			
 
				 
			
 
				     if EXTENDED_LOGGING:
			
@@ -86,12 +107,13 @@ if __name__ == '__main__':
 
				 
			
 
				     recorder = AudioToTextRecorder(**recorder_config)
			
 
				 
			
 
				-    clear_console()
			
 
				-    print("Say something...", end="", flush=True)
			
 
				-
			
 
				+    # Initial display message
			
 
				+    initial_text = Text("Say something...", style="green")
			
 
				+    live.update(initial_text)
			
 
				 
			
 
				     try:
			
 
				-        while (True):
			
 
				+        while True:
			
 
				             recorder.text(process_text)
			
 
				     except KeyboardInterrupt:
			
 
				-        print("Exiting application due to keyboard interrupt")
			
 
				+        live.stop()
			
 
				+        print("Exit due to keyboard interrupt.")