11 months ago · 1d8da19ac8
--- a/tests/realtimestt_test.py
+++ b/tests/realtimestt_test.py
@@ -9,8 +9,11 @@ if __name__ == '__main__':
 
															     import os
														
 
															     import sys
														
 
															     from RealtimeSTT import AudioToTextRecorder
														
 
															-    from colorama import Fore, Back, Style
														
 
															+    from colorama import Fore, Style
														
 
															     import colorama
														
 
															+    from rich.live import Live
														
 
															+    from rich.console import Console
														
 
															+    from rich.text import Text
														
 
															     if os.name == "nt" and (3, 8) <= sys.version_info < (3, 99):
														
 
															         from torchaudio._extension.utils import _init_dll_path
														
@@ -20,40 +23,58 @@ if __name__ == '__main__':
 
															     colorama.init()
														
 
															+    # Initialize Rich Console and Live
														
 
															+    console = Console()
														
 
															+    live = Live(console=console, refresh_per_second=10, screen=False)
														
 
															+    live.start()
														
 
															+
														
 
															     full_sentences = []
														
 
															     displayed_text = ""
														
 
															     prev_text = ""
														
 
															+    rich_text_stored = ""
														
 
															     recorder = None
														
 
															     end_of_sentence_detection_pause = 0.4
														
 
															-    mid_sentence_detection_pause = 0.7
														
 
															+    unknown_sentence_detection_pause = 0.7
														
 
															+    mid_sentence_detection_pause = 2.0
														
 
															     def clear_console():
														
 
															         os.system('clear' if os.name == 'posix' else 'cls')
														
 
															     def text_detected(text):
														
 
															-        global displayed_text, prev_text
														
 
															+        global displayed_text, prev_text, full_sentences, recorder, rich_text_stored
														
 
															         sentence_end_marks = ['.', '!', '?', '。'] 
														
 
															-        if text and text[-1] in sentence_end_marks and prev_text and prev_text[-1] in sentence_end_marks:
														
 
															+        if text.endswith("..."):
														
 
															+            recorder.post_speech_silence_duration = mid_sentence_detection_pause
														
 
															+        elif text and text[-1] in sentence_end_marks and prev_text and prev_text[-1] in sentence_end_marks:
														
 
															             recorder.post_speech_silence_duration = end_of_sentence_detection_pause
														
 
															         else:
														
 
															-            recorder.post_speech_silence_duration = mid_sentence_detection_pause
														
 
															+            recorder.post_speech_silence_duration = unknown_sentence_detection_pause
														
 
															         prev_text = text
														
 
															-        sentences_with_style = [
														
 
															-            f"{Fore.YELLOW + sentence + Style.RESET_ALL if i % 2 == 0 else Fore.CYAN + sentence + Style.RESET_ALL} "
														
 
															-            for i, sentence in enumerate(full_sentences)
														
 
															-        ]
														
 
															-        new_text = "".join(sentences_with_style).strip() + " " + text if len(sentences_with_style) > 0 else text
														
 
															-
														
 
															-        if new_text != displayed_text:
														
 
															-            displayed_text = new_text
														
 
															-            clear_console()
														
 
															-            print(displayed_text, end="", flush=True)
														
 
															+        # Build Rich Text with alternating colors
														
 
															+        rich_text = Text()
														
 
															+        for i, sentence in enumerate(full_sentences):
														
 
															+            if i % 2 == 0:
														
 
															+                rich_text += Text(sentence, style="yellow") + Text(" ")
														
 
															+            else:
														
 
															+                rich_text += Text(sentence, style="cyan") + Text(" ")
														
 
															+        
														
 
															+        # If the current text is not a sentence-ending, display it in real-time
														
 
															+        if text:
														
 
															+            rich_text += Text(text, style="white")
														
 
															+
														
 
															+        new_displayed_text = rich_text.plain
														
 
															+
														
 
															+        if new_displayed_text != displayed_text:
														
 
															+            displayed_text = new_displayed_text
														
 
															+            live.update(rich_text)
														
 
															+            rich_text_stored = rich_text
														
 
															     def process_text(text):
														
 
															-        recorder.post_speech_silence_duration = end_of_sentence_detection_pause
														
 
															+        global recorder, full_sentences, prev_text
														
 
															+        recorder.post_speech_silence_duration = unknown_sentence_detection_pause
														
 
															         full_sentences.append(text)
														
 
															         prev_text = ""
														
 
															         text_detected("")
														
@@ -62,23 +83,23 @@ if __name__ == '__main__':
 
															     recorder_config = {
														
 
															         'spinner': False,
														
 
															         'model': 'large-v2',
														
 
															+        # 'input_device_index': 1,
														
 
															         'realtime_model_type': 'tiny.en',
														
 
															         'language': 'en',
														
 
															-        'input_device_index': 1,
														
 
															         'silero_sensitivity': 0.05,
														
 
															         'webrtc_sensitivity': 3,
														
 
															-        'post_speech_silence_duration': end_of_sentence_detection_pause,
														
 
															-        'min_length_of_recording': 0,
														
 
															+        'post_speech_silence_duration': unknown_sentence_detection_pause,
														
 
															+        'min_length_of_recording': 0.7,        
														
 
															         'min_gap_between_recordings': 0,                
														
 
															         'enable_realtime_transcription': True,
														
 
															         'realtime_processing_pause': 0.1,
														
 
															-        'on_realtime_transcription_update': text_detected,
														
 
															+        #'on_realtime_transcription_update': text_detected,
														
 
															+        'on_realtime_transcription_stabilized': text_detected,
														
 
															         'silero_deactivity_detection': True,
														
 
															-        'min_length_of_recording': 0.7,        
														
 
															         'early_transcription_on_silence': 0.2,
														
 
															         'beam_size': 5,
														
 
															         'beam_size_realtime': 1,
														
 
															-        'no_log_file': False,
														
 
															+        'no_log_file': True,
														
 
															     }
														
 
															     if EXTENDED_LOGGING:
														
@@ -86,12 +107,13 @@ if __name__ == '__main__':
 
															     recorder = AudioToTextRecorder(**recorder_config)
														
 
															-    clear_console()
														
 
															-    print("Say something...", end="", flush=True)
														
 
															-
														
 
															+    # Initial display message
														
 
															+    initial_text = Text("Say something...", style="green")
														
 
															+    live.update(initial_text)
														
 
															     try:
														
 
															-        while (True):
														
 
															+        while True:
														
 
															             recorder.text(process_text)
														
 
															     except KeyboardInterrupt:
														
 
															-        print("Exiting application due to keyboard interrupt")
														
 
															+        live.stop()
														
 
															+        print("Exit due to keyboard interrupt.")