realtimestt_test.py 2.4 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980
  1. if __name__ == '__main__':
  2. EXTENDED_LOGGING = False
  3. import os
  4. import sys
  5. if os.name == "nt" and (3, 8) <= sys.version_info < (3, 99):
  6. from torchaudio._extension.utils import _init_dll_path
  7. _init_dll_path()
  8. if EXTENDED_LOGGING:
  9. import logging
  10. logging.basicConfig(level=logging.DEBUG)
  11. from RealtimeSTT import AudioToTextRecorder
  12. from colorama import Fore, Back, Style
  13. import colorama
  14. print("Initializing RealtimeSTT test...")
  15. colorama.init()
  16. full_sentences = []
  17. displayed_text = ""
  18. def clear_console():
  19. os.system('clear' if os.name == 'posix' else 'cls')
  20. def text_detected(text):
  21. global displayed_text
  22. sentences_with_style = [
  23. f"{Fore.YELLOW + sentence + Style.RESET_ALL if i % 2 == 0 else Fore.CYAN + sentence + Style.RESET_ALL} "
  24. for i, sentence in enumerate(full_sentences)
  25. ]
  26. new_text = "".join(sentences_with_style).strip() + " " + text if len(sentences_with_style) > 0 else text
  27. if new_text != displayed_text:
  28. displayed_text = new_text
  29. clear_console()
  30. print(f"Language: {recorder.detected_language} (realtime: {recorder.detected_realtime_language})")
  31. print(displayed_text, end="", flush=True)
  32. def process_text(text):
  33. full_sentences.append(text)
  34. text_detected("")
  35. recorder_config = {
  36. 'spinner': False,
  37. 'model': 'large-v2',
  38. 'realtime_model_type': 'tiny',
  39. 'language': 'en',
  40. 'silero_sensitivity': 0.05,
  41. 'webrtc_sensitivity': 3,
  42. 'post_speech_silence_duration': 0.4,
  43. 'min_length_of_recording': 0,
  44. 'min_gap_between_recordings': 0,
  45. 'enable_realtime_transcription': False,
  46. 'realtime_processing_pause': 0,
  47. 'on_realtime_transcription_update': text_detected,
  48. 'silero_deactivity_detection': True,
  49. 'min_length_of_recording': 0.5,
  50. 'early_transcription_on_silence': False
  51. }
  52. # Conditionally add logging level if EXTENDED_LOGGING is True
  53. if EXTENDED_LOGGING:
  54. recorder_config['level'] = logging.DEBUG
  55. recorder = AudioToTextRecorder(**recorder_config)
  56. clear_console()
  57. print("Say something...", end="", flush=True)
  58. try:
  59. while (True):
  60. recorder.text(process_text)
  61. except KeyboardInterrupt:
  62. print("Exiting application due to keyboard interrupt")