소스 검색

using rich now

KoljaB 7 달 전
부모
커밋
1d8da19ac8
1개의 변경된 파일49개의 추가작업 그리고 27개의 파일을 삭제
  1. 49 27
      tests/realtimestt_test.py

+ 49 - 27
tests/realtimestt_test.py

@@ -9,8 +9,11 @@ if __name__ == '__main__':
     import os
     import sys
     from RealtimeSTT import AudioToTextRecorder
-    from colorama import Fore, Back, Style
+    from colorama import Fore, Style
     import colorama
+    from rich.live import Live
+    from rich.console import Console
+    from rich.text import Text
 
     if os.name == "nt" and (3, 8) <= sys.version_info < (3, 99):
         from torchaudio._extension.utils import _init_dll_path
@@ -20,40 +23,58 @@ if __name__ == '__main__':
 
     colorama.init()
 
+    # Initialize Rich Console and Live
+    console = Console()
+    live = Live(console=console, refresh_per_second=10, screen=False)
+    live.start()
+
     full_sentences = []
     displayed_text = ""
     prev_text = ""
+    rich_text_stored = ""
     recorder = None
 
     end_of_sentence_detection_pause = 0.4
-    mid_sentence_detection_pause = 0.7
+    unknown_sentence_detection_pause = 0.7
+    mid_sentence_detection_pause = 2.0
 
     def clear_console():
         os.system('clear' if os.name == 'posix' else 'cls')
 
     def text_detected(text):
-        global displayed_text, prev_text
+        global displayed_text, prev_text, full_sentences, recorder, rich_text_stored
         sentence_end_marks = ['.', '!', '?', '。'] 
-        if text and text[-1] in sentence_end_marks and prev_text and prev_text[-1] in sentence_end_marks:
+        if text.endswith("..."):
+            recorder.post_speech_silence_duration = mid_sentence_detection_pause
+        elif text and text[-1] in sentence_end_marks and prev_text and prev_text[-1] in sentence_end_marks:
             recorder.post_speech_silence_duration = end_of_sentence_detection_pause
         else:
-            recorder.post_speech_silence_duration = mid_sentence_detection_pause
+            recorder.post_speech_silence_duration = unknown_sentence_detection_pause
 
         prev_text = text
 
-        sentences_with_style = [
-            f"{Fore.YELLOW + sentence + Style.RESET_ALL if i % 2 == 0 else Fore.CYAN + sentence + Style.RESET_ALL} "
-            for i, sentence in enumerate(full_sentences)
-        ]
-        new_text = "".join(sentences_with_style).strip() + " " + text if len(sentences_with_style) > 0 else text
-
-        if new_text != displayed_text:
-            displayed_text = new_text
-            clear_console()
-            print(displayed_text, end="", flush=True)
+        # Build Rich Text with alternating colors
+        rich_text = Text()
+        for i, sentence in enumerate(full_sentences):
+            if i % 2 == 0:
+                rich_text += Text(sentence, style="yellow") + Text(" ")
+            else:
+                rich_text += Text(sentence, style="cyan") + Text(" ")
+        
+        # If the current text is not a sentence-ending, display it in real-time
+        if text:
+            rich_text += Text(text, style="white")
+
+        new_displayed_text = rich_text.plain
+
+        if new_displayed_text != displayed_text:
+            displayed_text = new_displayed_text
+            live.update(rich_text)
+            rich_text_stored = rich_text
 
     def process_text(text):
-        recorder.post_speech_silence_duration = end_of_sentence_detection_pause
+        global recorder, full_sentences, prev_text
+        recorder.post_speech_silence_duration = unknown_sentence_detection_pause
         full_sentences.append(text)
         prev_text = ""
         text_detected("")
@@ -62,23 +83,23 @@ if __name__ == '__main__':
     recorder_config = {
         'spinner': False,
         'model': 'large-v2',
+        # 'input_device_index': 1,
         'realtime_model_type': 'tiny.en',
         'language': 'en',
-        'input_device_index': 1,
         'silero_sensitivity': 0.05,
         'webrtc_sensitivity': 3,
-        'post_speech_silence_duration': end_of_sentence_detection_pause,
-        'min_length_of_recording': 0,
+        'post_speech_silence_duration': unknown_sentence_detection_pause,
+        'min_length_of_recording': 0.7,        
         'min_gap_between_recordings': 0,                
         'enable_realtime_transcription': True,
         'realtime_processing_pause': 0.1,
-        'on_realtime_transcription_update': text_detected,
+        #'on_realtime_transcription_update': text_detected,
+        'on_realtime_transcription_stabilized': text_detected,
         'silero_deactivity_detection': True,
-        'min_length_of_recording': 0.7,        
         'early_transcription_on_silence': 0.2,
         'beam_size': 5,
         'beam_size_realtime': 1,
-        'no_log_file': False,
+        'no_log_file': True,
     }
 
     if EXTENDED_LOGGING:
@@ -86,12 +107,13 @@ if __name__ == '__main__':
 
     recorder = AudioToTextRecorder(**recorder_config)
 
-    clear_console()
-    print("Say something...", end="", flush=True)
-
+    # Initial display message
+    initial_text = Text("Say something...", style="green")
+    live.update(initial_text)
 
     try:
-        while (True):
+        while True:
             recorder.text(process_text)
     except KeyboardInterrupt:
-        print("Exiting application due to keyboard interrupt")
+        live.stop()
+        print("Exit due to keyboard interrupt.")