1 سال پیش · af1547656c
--- a/tests/realtimestt_test.py
+++ b/tests/realtimestt_test.py
@@ -6,43 +6,76 @@ if __name__ == '__main__':
 
															         import logging
														
 
															         logging.basicConfig(level=logging.DEBUG)
														
 
															+    from rich.console import Console
														
 
															+    from rich.live import Live
														
 
															+    from rich.text import Text
														
 
															+    from rich.panel import Panel
														
 
															+    from rich.spinner import Spinner
														
 
															+    from rich.progress import Progress, SpinnerColumn, TextColumn
														
 
															+    console = Console()
														
 
															+    # console.print("[bold yellow]System initializing, please wait...[/bold yellow]")
														
 
															+    console.print("System initializing, please wait")
														
 
															+
														
 
															+    
														
 
															+    # # Initial display message
														
 
															+    # with Progress(SpinnerColumn(), TextColumn("[progress.description]{task.description}"), transient=True) as progress:
														
 
															+    #     task = progress.add_task("[cyan]Setting up transcription...", total=None)
														
 
															+    #     console.print("[bold yellow]System initializing, please wait...[/bold yellow]")
														
 
															+    #     progress.update(task, description="[green]Initialization complete!")
														
 
															+
														
 
															     import os
														
 
															     import sys
														
 
															     from RealtimeSTT import AudioToTextRecorder
														
 
															     from colorama import Fore, Style
														
 
															     import colorama
														
 
															-    from rich.live import Live
														
 
															-    from rich.console import Console
														
 
															-    from rich.text import Text
														
 
															     if os.name == "nt" and (3, 8) <= sys.version_info < (3, 99):
														
 
															         from torchaudio._extension.utils import _init_dll_path
														
 
															         _init_dll_path()    
														
 
															-    print("Initializing RealtimeSTT test...")
														
 
															-
														
 
															     colorama.init()
														
 
															     # Initialize Rich Console and Live
														
 
															-    console = Console()
														
 
															     live = Live(console=console, refresh_per_second=10, screen=False)
														
 
															     live.start()
														
 
															     full_sentences = []
														
 
															-    displayed_text = ""
														
 
															-    prev_text = ""
														
 
															     rich_text_stored = ""
														
 
															     recorder = None
														
 
															+    displayed_text = ""  # Used for tracking text that was already displayed
														
 
															-    end_of_sentence_detection_pause = 0.4
														
 
															+    end_of_sentence_detection_pause = 0.45
														
 
															     unknown_sentence_detection_pause = 0.7
														
 
															     mid_sentence_detection_pause = 2.0
														
 
															     def clear_console():
														
 
															         os.system('clear' if os.name == 'posix' else 'cls')
														
 
															+    prev_text = ""
														
 
															+
														
 
															+    def preprocess_text(text):
														
 
															+        # Remove leading whitespaces
														
 
															+        text = text.lstrip()
														
 
															+
														
 
															+        #  Remove starting ellipses if present
														
 
															+        if text.startswith("..."):
														
 
															+            text = text[3:]
														
 
															+
														
 
															+        # Remove any leading whitespaces again after ellipses removal
														
 
															+        text = text.lstrip()
														
 
															+
														
 
															+        # Uppercase the first letter
														
 
															+        if text:
														
 
															+            text = text[0].upper() + text[1:]
														
 
															+        
														
 
															+        return text
														
 
															+
														
 
															+
														
 
															     def text_detected(text):
														
 
															-        global displayed_text, prev_text, full_sentences, recorder, rich_text_stored
														
 
															+        global prev_text, displayed_text, rich_text_stored
														
 
															+
														
 
															+        text = preprocess_text(text)
														
 
															+
														
 
															         sentence_end_marks = ['.', '!', '?', '。'] 
														
 
															         if text.endswith("..."):
														
 
															             recorder.post_speech_silence_duration = mid_sentence_detection_pause
														
@@ -57,24 +90,31 @@ if __name__ == '__main__':
 
															         rich_text = Text()
														
 
															         for i, sentence in enumerate(full_sentences):
														
 
															             if i % 2 == 0:
														
 
															+                #rich_text += Text(sentence, style="bold yellow") + Text(" ")
														
 
															                 rich_text += Text(sentence, style="yellow") + Text(" ")
														
 
															             else:
														
 
															                 rich_text += Text(sentence, style="cyan") + Text(" ")
														
 
															         # If the current text is not a sentence-ending, display it in real-time
														
 
															         if text:
														
 
															-            rich_text += Text(text, style="white")
														
 
															+            rich_text += Text(text, style="bold yellow")
														
 
															         new_displayed_text = rich_text.plain
														
 
															         if new_displayed_text != displayed_text:
														
 
															             displayed_text = new_displayed_text
														
 
															-            live.update(rich_text)
														
 
															+            panel = Panel(rich_text, title="[bold green]Live Transcription[/bold green]", border_style="bold green")
														
 
															+            live.update(panel)
														
 
															             rich_text_stored = rich_text
														
 
															     def process_text(text):
														
 
															         global recorder, full_sentences, prev_text
														
 
															         recorder.post_speech_silence_duration = unknown_sentence_detection_pause
														
 
															+        text = preprocess_text(text)
														
 
															+        text = text.rstrip()
														
 
															+        if text.endswith("..."):
														
 
															+            text = text[:-2]
														
 
															+                
														
 
															         full_sentences.append(text)
														
 
															         prev_text = ""
														
 
															         text_detected("")
														
@@ -83,32 +123,33 @@ if __name__ == '__main__':
 
															     recorder_config = {
														
 
															         'spinner': False,
														
 
															         'model': 'large-v2',
														
 
															-        # 'input_device_index': 1,
														
 
															-        'realtime_model_type': 'tiny.en',
														
 
															+        'input_device_index': 1,
														
 
															+        'realtime_model_type': 'small.en',
														
 
															+        #'realtime_model_type': 'small.en',
														
 
															         'language': 'en',
														
 
															         'silero_sensitivity': 0.05,
														
 
															         'webrtc_sensitivity': 3,
														
 
															         'post_speech_silence_duration': unknown_sentence_detection_pause,
														
 
															-        'min_length_of_recording': 0.7,        
														
 
															+        'min_length_of_recording': 1.1,        
														
 
															         'min_gap_between_recordings': 0,                
														
 
															         'enable_realtime_transcription': True,
														
 
															-        'realtime_processing_pause': 0.1,
														
 
															-        #'on_realtime_transcription_update': text_detected,
														
 
															-        'on_realtime_transcription_stabilized': text_detected,
														
 
															+        'realtime_processing_pause': 0.02,
														
 
															+        'on_realtime_transcription_update': text_detected,
														
 
															+        #'on_realtime_transcription_stabilized': text_detected,
														
 
															         'silero_deactivity_detection': True,
														
 
															         'early_transcription_on_silence': 0.2,
														
 
															         'beam_size': 5,
														
 
															-        'beam_size_realtime': 1,
														
 
															+        'beam_size_realtime': 5,
														
 
															         'no_log_file': True,
														
 
															+        'initial_prompt': "Only add a period at the end of a sentence if you are 100 percent certain that the speaker has finished their statement. If you're unsure or the sentence seems incomplete, leave the sentence open or use ellipses to reflect continuation. For example: 'I went to the...' or 'I think it was...'"
														
 
															     }
														
 
															     if EXTENDED_LOGGING:
														
 
															         recorder_config['level'] = logging.DEBUG
														
 
															     recorder = AudioToTextRecorder(**recorder_config)
														
 
															-
														
 
															-    # Initial display message
														
 
															-    initial_text = Text("Say something...", style="green")
														
 
															+    
														
 
															+    initial_text = Panel(Text("Say something...", style="cyan bold"), title="[bold yellow]Waiting for Input[/bold yellow]", border_style="bold yellow")
														
 
															     live.update(initial_text)
														
 
															     try:
														
@@ -116,4 +157,5 @@ if __name__ == '__main__':
 
															             recorder.text(process_text)
														
 
															     except KeyboardInterrupt:
														
 
															         live.stop()
														
 
															-        print("Exit due to keyboard interrupt.")
														
 
															+        console.print("[bold red]Transcription stopped by user. Exiting...[/bold red]")
														
 
															+        exit(0)
														
--- a/tests/realtimestt_test_stereomix.py
+++ b/tests/realtimestt_test_stereomix.py
@@ -110,7 +110,7 @@ def main():
 
															         text = preprocess_text(text)
														
 
															         text = text.rstrip()
														
 
															         if text.endswith("..."):
														
 
															-            text = text[:-3]  # Remove ellipsis
														
 
															+            text = text[:-2]  # Remove ellipsis
														
 
															         full_sentences.append(text)
														
 
															         prev_text = ""