|
@@ -9,8 +9,11 @@ if __name__ == '__main__':
|
|
import os
|
|
import os
|
|
import sys
|
|
import sys
|
|
from RealtimeSTT import AudioToTextRecorder
|
|
from RealtimeSTT import AudioToTextRecorder
|
|
- from colorama import Fore, Back, Style
|
|
|
|
|
|
+ from colorama import Fore, Style
|
|
import colorama
|
|
import colorama
|
|
|
|
+ from rich.live import Live
|
|
|
|
+ from rich.console import Console
|
|
|
|
+ from rich.text import Text
|
|
|
|
|
|
if os.name == "nt" and (3, 8) <= sys.version_info < (3, 99):
|
|
if os.name == "nt" and (3, 8) <= sys.version_info < (3, 99):
|
|
from torchaudio._extension.utils import _init_dll_path
|
|
from torchaudio._extension.utils import _init_dll_path
|
|
@@ -20,40 +23,58 @@ if __name__ == '__main__':
|
|
|
|
|
|
colorama.init()
|
|
colorama.init()
|
|
|
|
|
|
|
|
+ # Initialize Rich Console and Live
|
|
|
|
+ console = Console()
|
|
|
|
+ live = Live(console=console, refresh_per_second=10, screen=False)
|
|
|
|
+ live.start()
|
|
|
|
+
|
|
full_sentences = []
|
|
full_sentences = []
|
|
displayed_text = ""
|
|
displayed_text = ""
|
|
prev_text = ""
|
|
prev_text = ""
|
|
|
|
+ rich_text_stored = ""
|
|
recorder = None
|
|
recorder = None
|
|
|
|
|
|
end_of_sentence_detection_pause = 0.4
|
|
end_of_sentence_detection_pause = 0.4
|
|
- mid_sentence_detection_pause = 0.7
|
|
|
|
|
|
+ unknown_sentence_detection_pause = 0.7
|
|
|
|
+ mid_sentence_detection_pause = 2.0
|
|
|
|
|
|
def clear_console():
|
|
def clear_console():
|
|
os.system('clear' if os.name == 'posix' else 'cls')
|
|
os.system('clear' if os.name == 'posix' else 'cls')
|
|
|
|
|
|
def text_detected(text):
|
|
def text_detected(text):
|
|
- global displayed_text, prev_text
|
|
|
|
|
|
+ global displayed_text, prev_text, full_sentences, recorder, rich_text_stored
|
|
sentence_end_marks = ['.', '!', '?', '。']
|
|
sentence_end_marks = ['.', '!', '?', '。']
|
|
- if text and text[-1] in sentence_end_marks and prev_text and prev_text[-1] in sentence_end_marks:
|
|
|
|
|
|
+ if text.endswith("..."):
|
|
|
|
+ recorder.post_speech_silence_duration = mid_sentence_detection_pause
|
|
|
|
+ elif text and text[-1] in sentence_end_marks and prev_text and prev_text[-1] in sentence_end_marks:
|
|
recorder.post_speech_silence_duration = end_of_sentence_detection_pause
|
|
recorder.post_speech_silence_duration = end_of_sentence_detection_pause
|
|
else:
|
|
else:
|
|
- recorder.post_speech_silence_duration = mid_sentence_detection_pause
|
|
|
|
|
|
+ recorder.post_speech_silence_duration = unknown_sentence_detection_pause
|
|
|
|
|
|
prev_text = text
|
|
prev_text = text
|
|
|
|
|
|
- sentences_with_style = [
|
|
|
|
- f"{Fore.YELLOW + sentence + Style.RESET_ALL if i % 2 == 0 else Fore.CYAN + sentence + Style.RESET_ALL} "
|
|
|
|
- for i, sentence in enumerate(full_sentences)
|
|
|
|
- ]
|
|
|
|
- new_text = "".join(sentences_with_style).strip() + " " + text if len(sentences_with_style) > 0 else text
|
|
|
|
-
|
|
|
|
- if new_text != displayed_text:
|
|
|
|
- displayed_text = new_text
|
|
|
|
- clear_console()
|
|
|
|
- print(displayed_text, end="", flush=True)
|
|
|
|
|
|
+ # Build Rich Text with alternating colors
|
|
|
|
+ rich_text = Text()
|
|
|
|
+ for i, sentence in enumerate(full_sentences):
|
|
|
|
+ if i % 2 == 0:
|
|
|
|
+ rich_text += Text(sentence, style="yellow") + Text(" ")
|
|
|
|
+ else:
|
|
|
|
+ rich_text += Text(sentence, style="cyan") + Text(" ")
|
|
|
|
+
|
|
|
|
+ # If the current text is not a sentence-ending, display it in real-time
|
|
|
|
+ if text:
|
|
|
|
+ rich_text += Text(text, style="white")
|
|
|
|
+
|
|
|
|
+ new_displayed_text = rich_text.plain
|
|
|
|
+
|
|
|
|
+ if new_displayed_text != displayed_text:
|
|
|
|
+ displayed_text = new_displayed_text
|
|
|
|
+ live.update(rich_text)
|
|
|
|
+ rich_text_stored = rich_text
|
|
|
|
|
|
def process_text(text):
|
|
def process_text(text):
|
|
- recorder.post_speech_silence_duration = end_of_sentence_detection_pause
|
|
|
|
|
|
+ global recorder, full_sentences, prev_text
|
|
|
|
+ recorder.post_speech_silence_duration = unknown_sentence_detection_pause
|
|
full_sentences.append(text)
|
|
full_sentences.append(text)
|
|
prev_text = ""
|
|
prev_text = ""
|
|
text_detected("")
|
|
text_detected("")
|
|
@@ -62,23 +83,23 @@ if __name__ == '__main__':
|
|
recorder_config = {
|
|
recorder_config = {
|
|
'spinner': False,
|
|
'spinner': False,
|
|
'model': 'large-v2',
|
|
'model': 'large-v2',
|
|
|
|
+ # 'input_device_index': 1,
|
|
'realtime_model_type': 'tiny.en',
|
|
'realtime_model_type': 'tiny.en',
|
|
'language': 'en',
|
|
'language': 'en',
|
|
- 'input_device_index': 1,
|
|
|
|
'silero_sensitivity': 0.05,
|
|
'silero_sensitivity': 0.05,
|
|
'webrtc_sensitivity': 3,
|
|
'webrtc_sensitivity': 3,
|
|
- 'post_speech_silence_duration': end_of_sentence_detection_pause,
|
|
|
|
- 'min_length_of_recording': 0,
|
|
|
|
|
|
+ 'post_speech_silence_duration': unknown_sentence_detection_pause,
|
|
|
|
+ 'min_length_of_recording': 0.7,
|
|
'min_gap_between_recordings': 0,
|
|
'min_gap_between_recordings': 0,
|
|
'enable_realtime_transcription': True,
|
|
'enable_realtime_transcription': True,
|
|
'realtime_processing_pause': 0.1,
|
|
'realtime_processing_pause': 0.1,
|
|
- 'on_realtime_transcription_update': text_detected,
|
|
|
|
|
|
+ #'on_realtime_transcription_update': text_detected,
|
|
|
|
+ 'on_realtime_transcription_stabilized': text_detected,
|
|
'silero_deactivity_detection': True,
|
|
'silero_deactivity_detection': True,
|
|
- 'min_length_of_recording': 0.7,
|
|
|
|
'early_transcription_on_silence': 0.2,
|
|
'early_transcription_on_silence': 0.2,
|
|
'beam_size': 5,
|
|
'beam_size': 5,
|
|
'beam_size_realtime': 1,
|
|
'beam_size_realtime': 1,
|
|
- 'no_log_file': False,
|
|
|
|
|
|
+ 'no_log_file': True,
|
|
}
|
|
}
|
|
|
|
|
|
if EXTENDED_LOGGING:
|
|
if EXTENDED_LOGGING:
|
|
@@ -86,12 +107,13 @@ if __name__ == '__main__':
|
|
|
|
|
|
recorder = AudioToTextRecorder(**recorder_config)
|
|
recorder = AudioToTextRecorder(**recorder_config)
|
|
|
|
|
|
- clear_console()
|
|
|
|
- print("Say something...", end="", flush=True)
|
|
|
|
-
|
|
|
|
|
|
+ # Initial display message
|
|
|
|
+ initial_text = Text("Say something...", style="green")
|
|
|
|
+ live.update(initial_text)
|
|
|
|
|
|
try:
|
|
try:
|
|
- while (True):
|
|
|
|
|
|
+ while True:
|
|
recorder.text(process_text)
|
|
recorder.text(process_text)
|
|
except KeyboardInterrupt:
|
|
except KeyboardInterrupt:
|
|
- print("Exiting application due to keyboard interrupt")
|
|
|
|
|
|
+ live.stop()
|
|
|
|
+ print("Exit due to keyboard interrupt.")
|