소스 검색

engine switchable in example app + abort with esc

Kolja Beigel 1 년 전
부모
커밋
c4867c9962
1개의 변경된 파일84개의 추가작업 그리고 24개의 파일을 삭제
  1. 84 24
      example_app/VoiceApp.py

+ 84 - 24
example_app/VoiceApp.py

@@ -2,8 +2,8 @@ from RealtimeTTS import TextToAudioStream, AzureEngine, ElevenlabsEngine, System
 from RealtimeSTT import AudioToTextRecorder
 from RealtimeSTT import AudioToTextRecorder
 
 
 from PyQt5.QtCore import Qt, QTimer, QRect, QEvent, pyqtSignal, QThread, QPoint, QPropertyAnimation, QVariantAnimation
 from PyQt5.QtCore import Qt, QTimer, QRect, QEvent, pyqtSignal, QThread, QPoint, QPropertyAnimation, QVariantAnimation
-from PyQt5.QtGui import QPalette, QColor, QPainter, QFontMetrics, QFont
-from PyQt5.QtWidgets import QApplication, QLabel, QWidget, QDesktopWidget
+from PyQt5.QtGui import QPalette, QColor, QPainter, QFontMetrics, QFont, QMouseEvent, QContextMenuEvent
+from PyQt5.QtWidgets import QApplication, QLabel, QWidget, QDesktopWidget, QMenu, QAction
 
 
 import os
 import os
 import openai
 import openai
@@ -12,16 +12,17 @@ import time
 import sounddevice as sd
 import sounddevice as sd
 import numpy as np
 import numpy as np
 import wavio
 import wavio
-
+import keyboard
 
 
 max_history_messages = 6
 max_history_messages = 6
 return_to_wakewords_after_silence = 12
 return_to_wakewords_after_silence = 12
 start_with_wakeword = False
 start_with_wakeword = False
 recorder_model = "large-v2"
 recorder_model = "large-v2"
 language = "de"
 language = "de"
+engine = "azure" # elevenlabs, system
+azure_speech_region = "germanywestcentral"
 
 
 openai.api_key = os.environ.get("OPENAI_API_KEY")
 openai.api_key = os.environ.get("OPENAI_API_KEY")
-azure_speech_region = "germanywestcentral"
 
 
 user_font_size = 22
 user_font_size = 22
 user_color = QColor(208, 208, 208) # gray
 user_color = QColor(208, 208, 208) # gray
@@ -131,27 +132,37 @@ class TransparentWindow(QWidget):
         self.run_fade_user = False
         self.run_fade_user = False
         self.run_fade_assistant = False
         self.run_fade_assistant = False
 
 
-    def init(self):
-        self.stream = TextToAudioStream(
-                # SystemEngine(),
+        self.menu = QMenu()
+        self.menu.setStyleSheet("""
+            QMenu {
+                background-color: black;
+                color: white;
+                border-radius: 10px;
+            }
+            QMenu::item:selected {
+                background-color: #555555;
+            }
+            """)
+
+        self.elevenlabs_action = QAction("Elevenlabs", self)
+        self.azure_action = QAction("Azure", self)
+        self.system_action = QAction("System", self)
+
+        self.menu.addAction(self.elevenlabs_action)
+        self.menu.addAction(self.azure_action)
+        self.menu.addAction(self.system_action)
+
+        self.elevenlabs_action.triggered.connect(lambda: self.select_engine("elevenlabs"))
+        self.azure_action.triggered.connect(lambda: self.select_engine("azure"))
+        self.system_action.triggered.connect(lambda: self.select_engine("system"))
+
+    def mousePressEvent(self, event: QMouseEvent):
+        if event.button() == Qt.LeftButton:
+            if event.pos().x() >= self.width() - 100 and event.pos().y() <= 100:
+                self.menu.exec_(self.mapToGlobal(event.pos()))        
 
 
-                AzureEngine(
-                    os.environ.get("AZURE_SPEECH_KEY"),
-                    azure_speech_region,
-                    voice,
-                    rate=34,
-                    pitch=10,
-                ),
-
-                # ElevenlabsEngine(
-                #     os.environ.get("ELEVENLABS_API_KEY")
-                # ),
-                on_character=self.on_character,
-                on_text_stream_stop=self.on_text_stream_stop,
-                on_text_stream_start=self.on_text_stream_start,
-                on_audio_stream_stop=self.on_audio_stream_stop,
-                log_characters=True,
-            )       
+    def init(self):
+        self.select_engine("azure")
         self.recorder = AudioToTextRecorder(
         self.recorder = AudioToTextRecorder(
             model=recorder_model,
             model=recorder_model,
             language=language,
             language=language,
@@ -172,6 +183,49 @@ class TransparentWindow(QWidget):
         self.text_retrieval_thread.start()
         self.text_retrieval_thread.start()
         self.text_retrieval_thread.activate()
         self.text_retrieval_thread.activate()
 
 
+        keyboard.on_press_key('esc', self.on_escape)
+
+    def select_engine(self, engine_name):
+        if self.stream:
+            if self.stream.is_playing():
+                self.stream.stop()
+            self.stream = None
+
+        engine = None
+
+        if engine_name == "azure":
+            engine = AzureEngine(
+                    os.environ.get("AZURE_SPEECH_KEY"),
+                    azure_speech_region,
+                    voice,
+                    rate=34,
+                    pitch=10,
+                )
+
+        elif engine_name == "elevenlabs":
+            engine = ElevenlabsEngine(
+                    os.environ.get("ELEVENLABS_API_KEY")
+                )
+        else:
+            engine = SystemEngine(
+                voice="Stefan",
+                print_installed_voices=True
+            )
+
+        self.stream = TextToAudioStream(
+            engine,
+            on_character=self.on_character,
+            on_text_stream_stop=self.on_text_stream_stop,
+            on_text_stream_start=self.on_text_stream_start,
+            on_audio_stream_stop=self.on_audio_stream_stop,
+            log_characters=True
+        )
+
+
+    def on_escape(self, e):
+        if self.stream.is_playing():
+            self.stream.stop()
+
     def showEvent(self, event: QEvent):
     def showEvent(self, event: QEvent):
         super().showEvent(event)
         super().showEvent(event)
         if event.type() == QEvent.Show:
         if event.type() == QEvent.Show:
@@ -312,6 +366,12 @@ class TransparentWindow(QWidget):
         self.run_fade_assistant = True
         self.run_fade_assistant = True
         self.fade_out_assistant_text()
         self.fade_out_assistant_text()
 
 
+    # def keyPressEvent(self, event):
+    #     if event.key() == Qt.Key_Escape:
+    #         self.stream.stop()
+    #     super().keyPressEvent(event)
+
+
     def update_self(self):
     def update_self(self):
 
 
         self.blockSignals(True)
         self.blockSignals(True)