před 1 rokem · af1547656c
--- a/tests/realtimestt_test.py
+++ b/tests/realtimestt_test.py
@@ -6,43 +6,76 @@ if __name__ == '__main__':
 
				         import logging
			
 
				         logging.basicConfig(level=logging.DEBUG)
			
 
				 
			
 
				+    from rich.console import Console
			
 
				+    from rich.live import Live
			
 
				+    from rich.text import Text
			
 
				+    from rich.panel import Panel
			
 
				+    from rich.spinner import Spinner
			
 
				+    from rich.progress import Progress, SpinnerColumn, TextColumn
			
 
				+    console = Console()
			
 
				+    # console.print("[bold yellow]System initializing, please wait...[/bold yellow]")
			
 
				+    console.print("System initializing, please wait")
			
 
				+
			
 
				+    
			
 
				+    # # Initial display message
			
 
				+    # with Progress(SpinnerColumn(), TextColumn("[progress.description]{task.description}"), transient=True) as progress:
			
 
				+    #     task = progress.add_task("[cyan]Setting up transcription...", total=None)
			
 
				+    #     console.print("[bold yellow]System initializing, please wait...[/bold yellow]")
			
 
				+    #     progress.update(task, description="[green]Initialization complete!")
			
 
				+
			
 
				     import os
			
 
				     import sys
			
 
				     from RealtimeSTT import AudioToTextRecorder
			
 
				     from colorama import Fore, Style
			
 
				     import colorama
			
 
				-    from rich.live import Live
			
 
				-    from rich.console import Console
			
 
				-    from rich.text import Text
			
 
				 
			
 
				     if os.name == "nt" and (3, 8) <= sys.version_info < (3, 99):
			
 
				         from torchaudio._extension.utils import _init_dll_path
			
 
				         _init_dll_path()    
			
 
				 
			
 
				-    print("Initializing RealtimeSTT test...")
			
 
				-
			
 
				     colorama.init()
			
 
				 
			
 
				     # Initialize Rich Console and Live
			
 
				-    console = Console()
			
 
				     live = Live(console=console, refresh_per_second=10, screen=False)
			
 
				     live.start()
			
 
				 
			
 
				     full_sentences = []
			
 
				-    displayed_text = ""
			
 
				-    prev_text = ""
			
 
				     rich_text_stored = ""
			
 
				     recorder = None
			
 
				+    displayed_text = ""  # Used for tracking text that was already displayed
			
 
				 
			
 
				-    end_of_sentence_detection_pause = 0.4
			
 
				+    end_of_sentence_detection_pause = 0.45
			
 
				     unknown_sentence_detection_pause = 0.7
			
 
				     mid_sentence_detection_pause = 2.0
			
 
				 
			
 
				     def clear_console():
			
 
				         os.system('clear' if os.name == 'posix' else 'cls')
			
 
				 
			
 
				+    prev_text = ""
			
 
				+
			
 
				+    def preprocess_text(text):
			
 
				+        # Remove leading whitespaces
			
 
				+        text = text.lstrip()
			
 
				+
			
 
				+        #  Remove starting ellipses if present
			
 
				+        if text.startswith("..."):
			
 
				+            text = text[3:]
			
 
				+
			
 
				+        # Remove any leading whitespaces again after ellipses removal
			
 
				+        text = text.lstrip()
			
 
				+
			
 
				+        # Uppercase the first letter
			
 
				+        if text:
			
 
				+            text = text[0].upper() + text[1:]
			
 
				+        
			
 
				+        return text
			
 
				+
			
 
				+
			
 
				     def text_detected(text):
			
 
				-        global displayed_text, prev_text, full_sentences, recorder, rich_text_stored
			
 
				+        global prev_text, displayed_text, rich_text_stored
			
 
				+
			
 
				+        text = preprocess_text(text)
			
 
				+
			
 
				         sentence_end_marks = ['.', '!', '?', '。'] 
			
 
				         if text.endswith("..."):
			
 
				             recorder.post_speech_silence_duration = mid_sentence_detection_pause
			
@@ -57,24 +90,31 @@ if __name__ == '__main__':
 
				         rich_text = Text()
			
 
				         for i, sentence in enumerate(full_sentences):
			
 
				             if i % 2 == 0:
			
 
				+                #rich_text += Text(sentence, style="bold yellow") + Text(" ")
			
 
				                 rich_text += Text(sentence, style="yellow") + Text(" ")
			
 
				             else:
			
 
				                 rich_text += Text(sentence, style="cyan") + Text(" ")
			
 
				         
			
 
				         # If the current text is not a sentence-ending, display it in real-time
			
 
				         if text:
			
 
				-            rich_text += Text(text, style="white")
			
 
				+            rich_text += Text(text, style="bold yellow")
			
 
				 
			
 
				         new_displayed_text = rich_text.plain
			
 
				 
			
 
				         if new_displayed_text != displayed_text:
			
 
				             displayed_text = new_displayed_text
			
 
				-            live.update(rich_text)
			
 
				+            panel = Panel(rich_text, title="[bold green]Live Transcription[/bold green]", border_style="bold green")
			
 
				+            live.update(panel)
			
 
				             rich_text_stored = rich_text
			
 
				 
			
 
				     def process_text(text):
			
 
				         global recorder, full_sentences, prev_text
			
 
				         recorder.post_speech_silence_duration = unknown_sentence_detection_pause
			
 
				+        text = preprocess_text(text)
			
 
				+        text = text.rstrip()
			
 
				+        if text.endswith("..."):
			
 
				+            text = text[:-2]
			
 
				+                
			
 
				         full_sentences.append(text)
			
 
				         prev_text = ""
			
 
				         text_detected("")
			
@@ -83,32 +123,33 @@ if __name__ == '__main__':
 
				     recorder_config = {
			
 
				         'spinner': False,
			
 
				         'model': 'large-v2',
			
 
				-        # 'input_device_index': 1,
			
 
				-        'realtime_model_type': 'tiny.en',
			
 
				+        'input_device_index': 1,
			
 
				+        'realtime_model_type': 'small.en',
			
 
				+        #'realtime_model_type': 'small.en',
			
 
				         'language': 'en',
			
 
				         'silero_sensitivity': 0.05,
			
 
				         'webrtc_sensitivity': 3,
			
 
				         'post_speech_silence_duration': unknown_sentence_detection_pause,
			
 
				-        'min_length_of_recording': 0.7,        
			
 
				+        'min_length_of_recording': 1.1,        
			
 
				         'min_gap_between_recordings': 0,                
			
 
				         'enable_realtime_transcription': True,
			
 
				-        'realtime_processing_pause': 0.1,
			
 
				-        #'on_realtime_transcription_update': text_detected,
			
 
				-        'on_realtime_transcription_stabilized': text_detected,
			
 
				+        'realtime_processing_pause': 0.02,
			
 
				+        'on_realtime_transcription_update': text_detected,
			
 
				+        #'on_realtime_transcription_stabilized': text_detected,
			
 
				         'silero_deactivity_detection': True,
			
 
				         'early_transcription_on_silence': 0.2,
			
 
				         'beam_size': 5,
			
 
				-        'beam_size_realtime': 1,
			
 
				+        'beam_size_realtime': 5,
			
 
				         'no_log_file': True,
			
 
				+        'initial_prompt': "Only add a period at the end of a sentence if you are 100 percent certain that the speaker has finished their statement. If you're unsure or the sentence seems incomplete, leave the sentence open or use ellipses to reflect continuation. For example: 'I went to the...' or 'I think it was...'"
			
 
				     }
			
 
				 
			
 
				     if EXTENDED_LOGGING:
			
 
				         recorder_config['level'] = logging.DEBUG
			
 
				 
			
 
				     recorder = AudioToTextRecorder(**recorder_config)
			
 
				-
			
 
				-    # Initial display message
			
 
				-    initial_text = Text("Say something...", style="green")
			
 
				+    
			
 
				+    initial_text = Panel(Text("Say something...", style="cyan bold"), title="[bold yellow]Waiting for Input[/bold yellow]", border_style="bold yellow")
			
 
				     live.update(initial_text)
			
 
				 
			
 
				     try:
			
@@ -116,4 +157,5 @@ if __name__ == '__main__':
 
				             recorder.text(process_text)
			
 
				     except KeyboardInterrupt:
			
 
				         live.stop()
			
 
				-        print("Exit due to keyboard interrupt.")
			
 
				+        console.print("[bold red]Transcription stopped by user. Exiting...[/bold red]")
			
 
				+        exit(0)
			
--- a/tests/realtimestt_test_stereomix.py
+++ b/tests/realtimestt_test_stereomix.py
@@ -110,7 +110,7 @@ def main():
 
				         text = preprocess_text(text)
			
 
				         text = text.rstrip()
			
 
				         if text.endswith("..."):
			
 
				-            text = text[:-3]  # Remove ellipsis
			
 
				+            text = text[:-2]  # Remove ellipsis
			
 
				 
			
 
				         full_sentences.append(text)
			
 
				         prev_text = ""