Parcourir la source

engine switchable in example app + abort with esc

Kolja Beigel il y a 1 an
Parent
commit
c4867c9962
1 fichiers modifiés avec 84 ajouts et 24 suppressions
  1. 84 24
      example_app/VoiceApp.py

+ 84 - 24
example_app/VoiceApp.py

@@ -2,8 +2,8 @@ from RealtimeTTS import TextToAudioStream, AzureEngine, ElevenlabsEngine, System
 from RealtimeSTT import AudioToTextRecorder
 
 from PyQt5.QtCore import Qt, QTimer, QRect, QEvent, pyqtSignal, QThread, QPoint, QPropertyAnimation, QVariantAnimation
-from PyQt5.QtGui import QPalette, QColor, QPainter, QFontMetrics, QFont
-from PyQt5.QtWidgets import QApplication, QLabel, QWidget, QDesktopWidget
+from PyQt5.QtGui import QPalette, QColor, QPainter, QFontMetrics, QFont, QMouseEvent, QContextMenuEvent
+from PyQt5.QtWidgets import QApplication, QLabel, QWidget, QDesktopWidget, QMenu, QAction
 
 import os
 import openai
@@ -12,16 +12,17 @@ import time
 import sounddevice as sd
 import numpy as np
 import wavio
-
+import keyboard
 
 max_history_messages = 6
 return_to_wakewords_after_silence = 12
 start_with_wakeword = False
 recorder_model = "large-v2"
 language = "de"
+engine = "azure" # elevenlabs, system
+azure_speech_region = "germanywestcentral"
 
 openai.api_key = os.environ.get("OPENAI_API_KEY")
-azure_speech_region = "germanywestcentral"
 
 user_font_size = 22
 user_color = QColor(208, 208, 208) # gray
@@ -131,27 +132,37 @@ class TransparentWindow(QWidget):
         self.run_fade_user = False
         self.run_fade_assistant = False
 
-    def init(self):
-        self.stream = TextToAudioStream(
-                # SystemEngine(),
+        self.menu = QMenu()
+        self.menu.setStyleSheet("""
+            QMenu {
+                background-color: black;
+                color: white;
+                border-radius: 10px;
+            }
+            QMenu::item:selected {
+                background-color: #555555;
+            }
+            """)
+
+        self.elevenlabs_action = QAction("Elevenlabs", self)
+        self.azure_action = QAction("Azure", self)
+        self.system_action = QAction("System", self)
+
+        self.menu.addAction(self.elevenlabs_action)
+        self.menu.addAction(self.azure_action)
+        self.menu.addAction(self.system_action)
+
+        self.elevenlabs_action.triggered.connect(lambda: self.select_engine("elevenlabs"))
+        self.azure_action.triggered.connect(lambda: self.select_engine("azure"))
+        self.system_action.triggered.connect(lambda: self.select_engine("system"))
+
+    def mousePressEvent(self, event: QMouseEvent):
+        if event.button() == Qt.LeftButton:
+            if event.pos().x() >= self.width() - 100 and event.pos().y() <= 100:
+                self.menu.exec_(self.mapToGlobal(event.pos()))        
 
-                AzureEngine(
-                    os.environ.get("AZURE_SPEECH_KEY"),
-                    azure_speech_region,
-                    voice,
-                    rate=34,
-                    pitch=10,
-                ),
-
-                # ElevenlabsEngine(
-                #     os.environ.get("ELEVENLABS_API_KEY")
-                # ),
-                on_character=self.on_character,
-                on_text_stream_stop=self.on_text_stream_stop,
-                on_text_stream_start=self.on_text_stream_start,
-                on_audio_stream_stop=self.on_audio_stream_stop,
-                log_characters=True,
-            )       
+    def init(self):
+        self.select_engine("azure")
         self.recorder = AudioToTextRecorder(
             model=recorder_model,
             language=language,
@@ -172,6 +183,49 @@ class TransparentWindow(QWidget):
         self.text_retrieval_thread.start()
         self.text_retrieval_thread.activate()
 
+        keyboard.on_press_key('esc', self.on_escape)
+
+    def select_engine(self, engine_name):
+        if self.stream:
+            if self.stream.is_playing():
+                self.stream.stop()
+            self.stream = None
+
+        engine = None
+
+        if engine_name == "azure":
+            engine = AzureEngine(
+                    os.environ.get("AZURE_SPEECH_KEY"),
+                    azure_speech_region,
+                    voice,
+                    rate=34,
+                    pitch=10,
+                )
+
+        elif engine_name == "elevenlabs":
+            engine = ElevenlabsEngine(
+                    os.environ.get("ELEVENLABS_API_KEY")
+                )
+        else:
+            engine = SystemEngine(
+                voice="Stefan",
+                print_installed_voices=True
+            )
+
+        self.stream = TextToAudioStream(
+            engine,
+            on_character=self.on_character,
+            on_text_stream_stop=self.on_text_stream_stop,
+            on_text_stream_start=self.on_text_stream_start,
+            on_audio_stream_stop=self.on_audio_stream_stop,
+            log_characters=True
+        )
+
+
+    def on_escape(self, e):
+        if self.stream.is_playing():
+            self.stream.stop()
+
     def showEvent(self, event: QEvent):
         super().showEvent(event)
         if event.type() == QEvent.Show:
@@ -312,6 +366,12 @@ class TransparentWindow(QWidget):
         self.run_fade_assistant = True
         self.fade_out_assistant_text()
 
+    # def keyPressEvent(self, event):
+    #     if event.key() == Qt.Key_Escape:
+    #         self.stream.stop()
+    #     super().keyPressEvent(event)
+
+
     def update_self(self):
 
         self.blockSignals(True)