123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121 |
- from PyQt5.QtWidgets import QApplication, QWidget, QVBoxLayout, QTextEdit, QPushButton
- from PyQt5.QtGui import QFont
- from PyQt5.QtCore import pyqtSignal
- import sys
- import os
- from RealtimeTTS import TextToAudioStream, AzureEngine
- from RealtimeSTT import AudioToTextRecorder
- if __name__ == '__main__':
- class SimpleApp(QWidget):
- update_stt_text_signal = pyqtSignal(str)
- update_tts_text_signal = pyqtSignal(str)
- def __init__(self):
- super().__init__()
- layout = QVBoxLayout()
- font = QFont()
- font.setPointSize(18)
- self.input_text = QTextEdit(self)
- self.input_text.setFont(font)
- self.input_text.setPlaceholderText("Input")
- self.input_text.setMinimumHeight(100)
- layout.addWidget(self.input_text)
- self.button_speak_input = QPushButton("Speak and detect input text", self)
- self.button_speak_input.setFont(font)
- self.button_speak_input.clicked.connect(self.speak_input)
- layout.addWidget(self.button_speak_input)
- self.tts_text = QTextEdit(self)
- self.tts_text.setFont(font)
- self.tts_text.setPlaceholderText("STT (final)")
- self.tts_text.setMinimumHeight(100)
- self.tts_text.setReadOnly(True)
- layout.addWidget(self.tts_text)
- self.stt_text = QTextEdit(self)
- self.stt_text.setFont(font)
- self.stt_text.setPlaceholderText("STT (realtime)")
- self.stt_text.setMinimumHeight(100)
- layout.addWidget(self.stt_text)
- self.button_speak_stt = QPushButton("Speak detected text again", self)
- self.button_speak_stt.setFont(font)
- self.button_speak_stt.clicked.connect(self.speak_stt)
- layout.addWidget(self.button_speak_stt)
- self.setLayout(layout)
- self.setWindowTitle("Realtime TTS/STT Loop Test")
- self.resize(800, 600)
- self.update_stt_text_signal.connect(self.actual_update_stt_text)
- self.update_tts_text_signal.connect(self.actual_update_tts_text)
- self.stream = TextToAudioStream(AzureEngine(os.environ.get("AZURE_SPEECH_KEY"), "germanywestcentral"), on_audio_stream_stop=self.audio_stream_stop)
- recorder_config = {
- 'spinner': False,
- 'model': 'large-v2',
- 'language': 'en',
- 'silero_sensitivity': 0.01,
- 'webrtc_sensitivity': 3,
- 'post_speech_silence_duration': 0.01,
- 'min_length_of_recording': 0.2,
- 'min_gap_between_recordings': 0,
- 'enable_realtime_transcription': True,
- 'realtime_processing_pause': 0,
- 'realtime_model_type': 'small.en',
- 'on_realtime_transcription_stabilized': self.text_detected,
- }
- self.recorder = AudioToTextRecorder(**recorder_config)
- def speak_stt(self):
- text = self.stt_text.toPlainText()
- self.speak(text)
- def speak_input(self):
- text = self.input_text.toPlainText()
- self.speak(text)
- def text_detected(self, text):
- self.update_stt_text_signal.emit(text)
- def audio_stream_stop(self):
- self.stream.stop()
- self.recorder.stop()
- detected_text = self.recorder.text()
- self.update_stt_text_signal.emit(detected_text)
- self.update_tts_text_signal.emit(detected_text)
- def speak(self, text):
- self.stt_text.clear()
- self.stream.feed(text)
- self.recorder.start()
- self.stream.play_async()
- def actual_update_stt_text(self, text):
- self.stt_text.setText(text)
- def actual_update_tts_text(self, text):
- self.tts_text.setText(text)
- def closeEvent(self, event):
- if self.recorder:
- self.recorder.shutdown()
- app = QApplication(sys.argv)
- window = SimpleApp()
- window.show()
- sys.exit(app.exec_())
|