пре 10 месеци · 23e19a1331
--- a/tests/realtimestt_test_stereomix.py
+++ b/tests/realtimestt_test_stereomix.py
@@ -0,0 +1,233 @@
 
				+import os
			
 
				+import sys
			
 
				+import threading
			
 
				+import time
			
 
				+import pyaudio
			
 
				+from rich.console import Console
			
 
				+from rich.live import Live
			
 
				+from rich.text import Text
			
 
				+from rich.panel import Panel
			
 
				+from rich.spinner import Spinner
			
 
				+from rich.progress import Progress, SpinnerColumn, TextColumn
			
 
				+from colorama import Fore, Style, init as colorama_init
			
 
				+
			
 
				+from RealtimeSTT import AudioToTextRecorder 
			
 
				+
			
 
				+# Configuration Constants
			
 
				+LOOPBACK_DEVICE_NAME = "stereomix"
			
 
				+LOOPBACK_DEVICE_HOST_API = 0
			
 
				+BUFFER_SIZE = 512 
			
 
				+AUDIO_FORMAT = pyaudio.paInt16
			
 
				+CHANNELS = 1
			
 
				+RATE = 16000
			
 
				+
			
 
				+EXTENDED_LOGGING = False
			
 
				+
			
 
				+def main():
			
 
				+    if EXTENDED_LOGGING:
			
 
				+        import logging
			
 
				+        logging.basicConfig(level=logging.DEBUG)
			
 
				+
			
 
				+    console = Console()
			
 
				+    console.print("System initializing, please wait")
			
 
				+
			
 
				+    colorama_init()
			
 
				+
			
 
				+    # Initialize Rich Console and Live
			
 
				+    live = Live(console=console, refresh_per_second=10, screen=False)
			
 
				+    live.start()
			
 
				+
			
 
				+    full_sentences = []
			
 
				+    rich_text_stored = ""
			
 
				+    recorder = None
			
 
				+    displayed_text = ""  # Used for tracking text that was already displayed
			
 
				+
			
 
				+    end_of_sentence_detection_pause = 0.2
			
 
				+    unknown_sentence_detection_pause = 0.5
			
 
				+    mid_sentence_detection_pause = 1
			
 
				+
			
 
				+    prev_text = ""
			
 
				+
			
 
				+    def clear_console():
			
 
				+        os.system('clear' if os.name == 'posix' else 'cls')
			
 
				+
			
 
				+    def preprocess_text(text):
			
 
				+        # Remove leading whitespaces
			
 
				+        text = text.lstrip()
			
 
				+
			
 
				+        # Remove starting ellipses if present
			
 
				+        if text.startswith("..."):
			
 
				+            text = text[3:]
			
 
				+
			
 
				+        # Remove any leading whitespaces again after ellipses removal
			
 
				+        text = text.lstrip()
			
 
				+
			
 
				+        # Uppercase the first letter
			
 
				+        if text:
			
 
				+            text = text[0].upper() + text[1:]
			
 
				+
			
 
				+        return text
			
 
				+
			
 
				+    def text_detected(text):
			
 
				+        nonlocal prev_text, displayed_text, rich_text_stored
			
 
				+
			
 
				+        text = preprocess_text(text)
			
 
				+
			
 
				+        sentence_end_marks = ['.', '!', '?', '。']
			
 
				+        midsentence_marks = ['…', '-', '(']
			
 
				+        if text.endswith("...") or text and text[-1] in midsentence_marks:
			
 
				+            recorder.post_speech_silence_duration = mid_sentence_detection_pause
			
 
				+        elif text and text[-1] in sentence_end_marks and prev_text and prev_text[-1] in sentence_end_marks:
			
 
				+            recorder.post_speech_silence_duration = end_of_sentence_detection_pause
			
 
				+        else:
			
 
				+            recorder.post_speech_silence_duration = unknown_sentence_detection_pause
			
 
				+
			
 
				+        prev_text = text
			
 
				+
			
 
				+        # Build Rich Text with alternating colors
			
 
				+        rich_text = Text()
			
 
				+        for i, sentence in enumerate(full_sentences):
			
 
				+            if i % 2 == 0:
			
 
				+                rich_text += Text(sentence, style="yellow") + Text(" ")
			
 
				+            else:
			
 
				+                rich_text += Text(sentence, style="cyan") + Text(" ")
			
 
				+
			
 
				+        # If the current text is not a sentence-ending, display it in real-time
			
 
				+        if text:
			
 
				+            rich_text += Text(text, style="bold yellow")
			
 
				+
			
 
				+        new_displayed_text = rich_text.plain
			
 
				+
			
 
				+        if new_displayed_text != displayed_text:
			
 
				+            displayed_text = new_displayed_text
			
 
				+            panel = Panel(rich_text, title="[bold green]Live Transcription[/bold green]", border_style="bold green")
			
 
				+            live.update(panel)
			
 
				+            rich_text_stored = rich_text
			
 
				+
			
 
				+    def process_text(text):
			
 
				+        nonlocal recorder, full_sentences, prev_text
			
 
				+        recorder.post_speech_silence_duration = unknown_sentence_detection_pause
			
 
				+        text = preprocess_text(text)
			
 
				+        text = text.rstrip()
			
 
				+        if text.endswith("..."):
			
 
				+            text = text[:-3]  # Remove ellipsis
			
 
				+
			
 
				+        full_sentences.append(text)
			
 
				+        prev_text = ""
			
 
				+        text_detected("")
			
 
				+
			
 
				+    # Recorder configuration
			
 
				+    recorder_config = {
			
 
				+        'spinner': False,
			
 
				+        'use_microphone': False,
			
 
				+        'model': 'large-v2',
			
 
				+        'input_device_index': None,  # To be set after finding the device
			
 
				+        'realtime_model_type': 'tiny.en',
			
 
				+        'language': 'en',
			
 
				+        'silero_sensitivity': 0.05,
			
 
				+        'webrtc_sensitivity': 3,
			
 
				+        'post_speech_silence_duration': unknown_sentence_detection_pause,
			
 
				+        'min_length_of_recording': 2.0,        
			
 
				+        'min_gap_between_recordings': 0,
			
 
				+        'enable_realtime_transcription': True,
			
 
				+        'realtime_processing_pause': 0.01,
			
 
				+        'on_realtime_transcription_update': text_detected,
			
 
				+        'silero_deactivity_detection': False,
			
 
				+        'early_transcription_on_silence': 0,
			
 
				+        'beam_size': 5,
			
 
				+        'beam_size_realtime': 1,
			
 
				+        'no_log_file': True,
			
 
				+        'initial_prompt': "Use ellipses for incomplete sentences like: I went to the..."
			
 
				+    }
			
 
				+
			
 
				+    if EXTENDED_LOGGING:
			
 
				+        recorder_config['level'] = logging.DEBUG
			
 
				+
			
 
				+    # Initialize PyAudio
			
 
				+    audio = pyaudio.PyAudio()
			
 
				+
			
 
				+    def find_stereo_mix_index():
			
 
				+        nonlocal audio
			
 
				+        devices_info = ""
			
 
				+        for i in range(audio.get_device_count()):
			
 
				+            dev = audio.get_device_info_by_index(i)
			
 
				+            devices_info += f"{dev['index']}: {dev['name']} (hostApi: {dev['hostApi']})\n"
			
 
				+
			
 
				+            if (LOOPBACK_DEVICE_NAME.lower() in dev['name'].lower()
			
 
				+                    and dev['hostApi'] == LOOPBACK_DEVICE_HOST_API):
			
 
				+                return dev['index'], devices_info
			
 
				+
			
 
				+        return None, devices_info
			
 
				+
			
 
				+    device_index, devices_info = find_stereo_mix_index()
			
 
				+    if device_index is None:
			
 
				+        live.stop()
			
 
				+        console.print("[bold red]Stereo Mix device not found. Available audio devices are:\n[/bold red]")
			
 
				+        console.print(devices_info, style="red")
			
 
				+        audio.terminate()
			
 
				+        sys.exit(1)
			
 
				+    else:
			
 
				+        recorder_config['input_device_index'] = device_index
			
 
				+        console.print(f"Using audio device index {device_index} for Stereo Mix.", style="green")
			
 
				+
			
 
				+    # Initialize the recorder
			
 
				+    recorder = AudioToTextRecorder(**recorder_config)
			
 
				+
			
 
				+    # Initialize Live Display with waiting message
			
 
				+    initial_text = Panel(Text("Say something...", style="cyan bold"), title="[bold yellow]Waiting for Input[/bold yellow]", border_style="bold yellow")
			
 
				+    live.update(initial_text)
			
 
				+
			
 
				+    # Define the recording thread
			
 
				+    def recording_thread():
			
 
				+        nonlocal recorder
			
 
				+        stream = audio.open(format=AUDIO_FORMAT,
			
 
				+                            channels=CHANNELS,
			
 
				+                            rate=RATE,
			
 
				+                            input=True,
			
 
				+                            frames_per_buffer=BUFFER_SIZE,
			
 
				+                            input_device_index=recorder_config['input_device_index'])
			
 
				+
			
 
				+        try:
			
 
				+            while not stop_event.is_set():
			
 
				+                data = stream.read(BUFFER_SIZE, exception_on_overflow=False)
			
 
				+                recorder.feed_audio(data)
			
 
				+        except Exception as e:
			
 
				+            console.print(f"[bold red]Error in recording thread: {e}[/bold red]")
			
 
				+        finally:
			
 
				+            console.print(f"[bold red]Stopping stream[/bold red]")
			
 
				+            stream.stop_stream()
			
 
				+            stream.close()
			
 
				+
			
 
				+    # Define the stop event
			
 
				+    stop_event = threading.Event()
			
 
				+
			
 
				+    # Start the recording thread
			
 
				+    thread = threading.Thread(target=recording_thread, daemon=True)
			
 
				+    thread.start()
			
 
				+
			
 
				+    try:
			
 
				+        while True:
			
 
				+            recorder.text(process_text)
			
 
				+    except KeyboardInterrupt:
			
 
				+        console.print("[bold red]\nTranscription stopped by user. Exiting...[/bold red]")
			
 
				+    finally:
			
 
				+        print("live stop")
			
 
				+        live.stop()
			
 
				+
			
 
				+        print("setting stop event")
			
 
				+        stop_event.set()
			
 
				+
			
 
				+        print("thread join")
			
 
				+        thread.join()
			
 
				+
			
 
				+        print("recorder stop")
			
 
				+        recorder.stop()
			
 
				+
			
 
				+        print("audio terminate")
			
 
				+        audio.terminate()
			
 
				+
			
 
				+        print("sys exit ")
			
 
				+        sys.exit(0)
			
 
				+
			
 
				+if __name__ == '__main__':
			
 
				+    main()