realtime_loop_test.py 4.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121
  1. from PyQt5.QtWidgets import QApplication, QWidget, QVBoxLayout, QTextEdit, QPushButton
  2. from PyQt5.QtGui import QFont
  3. from PyQt5.QtCore import pyqtSignal
  4. import sys
  5. import os
  6. from RealtimeTTS import TextToAudioStream, AzureEngine
  7. from RealtimeSTT import AudioToTextRecorder
  8. if __name__ == '__main__':
  9. class SimpleApp(QWidget):
  10. update_stt_text_signal = pyqtSignal(str)
  11. update_tts_text_signal = pyqtSignal(str)
  12. def __init__(self):
  13. super().__init__()
  14. layout = QVBoxLayout()
  15. font = QFont()
  16. font.setPointSize(18)
  17. self.input_text = QTextEdit(self)
  18. self.input_text.setFont(font)
  19. self.input_text.setPlaceholderText("Input")
  20. self.input_text.setMinimumHeight(100)
  21. layout.addWidget(self.input_text)
  22. self.button_speak_input = QPushButton("Speak and detect input text", self)
  23. self.button_speak_input.setFont(font)
  24. self.button_speak_input.clicked.connect(self.speak_input)
  25. layout.addWidget(self.button_speak_input)
  26. self.tts_text = QTextEdit(self)
  27. self.tts_text.setFont(font)
  28. self.tts_text.setPlaceholderText("STT (final)")
  29. self.tts_text.setMinimumHeight(100)
  30. self.tts_text.setReadOnly(True)
  31. layout.addWidget(self.tts_text)
  32. self.stt_text = QTextEdit(self)
  33. self.stt_text.setFont(font)
  34. self.stt_text.setPlaceholderText("STT (realtime)")
  35. self.stt_text.setMinimumHeight(100)
  36. layout.addWidget(self.stt_text)
  37. self.button_speak_stt = QPushButton("Speak detected text again", self)
  38. self.button_speak_stt.setFont(font)
  39. self.button_speak_stt.clicked.connect(self.speak_stt)
  40. layout.addWidget(self.button_speak_stt)
  41. self.setLayout(layout)
  42. self.setWindowTitle("Realtime TTS/STT Loop Test")
  43. self.resize(800, 600)
  44. self.update_stt_text_signal.connect(self.actual_update_stt_text)
  45. self.update_tts_text_signal.connect(self.actual_update_tts_text)
  46. self.stream = TextToAudioStream(AzureEngine(os.environ.get("AZURE_SPEECH_KEY"), "germanywestcentral"), on_audio_stream_stop=self.audio_stream_stop)
  47. recorder_config = {
  48. 'spinner': False,
  49. 'model': 'large-v2',
  50. 'language': 'en',
  51. 'silero_sensitivity': 0.01,
  52. 'webrtc_sensitivity': 3,
  53. 'post_speech_silence_duration': 0.01,
  54. 'min_length_of_recording': 0.2,
  55. 'min_gap_between_recordings': 0,
  56. 'enable_realtime_transcription': True,
  57. 'realtime_processing_pause': 0,
  58. 'realtime_model_type': 'small.en',
  59. 'on_realtime_transcription_stabilized': self.text_detected,
  60. }
  61. self.recorder = AudioToTextRecorder(**recorder_config)
  62. def speak_stt(self):
  63. text = self.stt_text.toPlainText()
  64. self.speak(text)
  65. def speak_input(self):
  66. text = self.input_text.toPlainText()
  67. self.speak(text)
  68. def text_detected(self, text):
  69. self.update_stt_text_signal.emit(text)
  70. def audio_stream_stop(self):
  71. self.stream.stop()
  72. self.recorder.stop()
  73. detected_text = self.recorder.text()
  74. self.update_stt_text_signal.emit(detected_text)
  75. self.update_tts_text_signal.emit(detected_text)
  76. def speak(self, text):
  77. self.stt_text.clear()
  78. self.stream.feed(text)
  79. self.recorder.start()
  80. self.stream.play_async()
  81. def actual_update_stt_text(self, text):
  82. self.stt_text.setText(text)
  83. def actual_update_tts_text(self, text):
  84. self.tts_text.setText(text)
  85. def closeEvent(self, event):
  86. if self.recorder:
  87. self.recorder.shutdown()
  88. app = QApplication(sys.argv)
  89. window = SimpleApp()
  90. window.show()
  91. sys.exit(app.exec_())