浏览代码

engine switchable in example app + abort with esc

Kolja Beigel 2 年之前
父节点
当前提交
c4867c9962
共有 1 个文件被更改,包括 84 次插入24 次删除
  1. 84 24
      example_app/VoiceApp.py

+ 84 - 24
example_app/VoiceApp.py

@@ -2,8 +2,8 @@ from RealtimeTTS import TextToAudioStream, AzureEngine, ElevenlabsEngine, System
 from RealtimeSTT import AudioToTextRecorder
 from RealtimeSTT import AudioToTextRecorder
 
 
 from PyQt5.QtCore import Qt, QTimer, QRect, QEvent, pyqtSignal, QThread, QPoint, QPropertyAnimation, QVariantAnimation
 from PyQt5.QtCore import Qt, QTimer, QRect, QEvent, pyqtSignal, QThread, QPoint, QPropertyAnimation, QVariantAnimation
-from PyQt5.QtGui import QPalette, QColor, QPainter, QFontMetrics, QFont
-from PyQt5.QtWidgets import QApplication, QLabel, QWidget, QDesktopWidget
+from PyQt5.QtGui import QPalette, QColor, QPainter, QFontMetrics, QFont, QMouseEvent, QContextMenuEvent
+from PyQt5.QtWidgets import QApplication, QLabel, QWidget, QDesktopWidget, QMenu, QAction
 
 
 import os
 import os
 import openai
 import openai
@@ -12,16 +12,17 @@ import time
 import sounddevice as sd
 import sounddevice as sd
 import numpy as np
 import numpy as np
 import wavio
 import wavio
-
+import keyboard
 
 
 max_history_messages = 6
 max_history_messages = 6
 return_to_wakewords_after_silence = 12
 return_to_wakewords_after_silence = 12
 start_with_wakeword = False
 start_with_wakeword = False
 recorder_model = "large-v2"
 recorder_model = "large-v2"
 language = "de"
 language = "de"
+engine = "azure" # elevenlabs, system
+azure_speech_region = "germanywestcentral"
 
 
 openai.api_key = os.environ.get("OPENAI_API_KEY")
 openai.api_key = os.environ.get("OPENAI_API_KEY")
-azure_speech_region = "germanywestcentral"
 
 
 user_font_size = 22
 user_font_size = 22
 user_color = QColor(208, 208, 208) # gray
 user_color = QColor(208, 208, 208) # gray
@@ -131,27 +132,37 @@ class TransparentWindow(QWidget):
         self.run_fade_user = False
         self.run_fade_user = False
         self.run_fade_assistant = False
         self.run_fade_assistant = False
 
 
-    def init(self):
-        self.stream = TextToAudioStream(
-                # SystemEngine(),
+        self.menu = QMenu()
+        self.menu.setStyleSheet("""
+            QMenu {
+                background-color: black;
+                color: white;
+                border-radius: 10px;
+            }
+            QMenu::item:selected {
+                background-color: #555555;
+            }
+            """)
+
+        self.elevenlabs_action = QAction("Elevenlabs", self)
+        self.azure_action = QAction("Azure", self)
+        self.system_action = QAction("System", self)
+
+        self.menu.addAction(self.elevenlabs_action)
+        self.menu.addAction(self.azure_action)
+        self.menu.addAction(self.system_action)
+
+        self.elevenlabs_action.triggered.connect(lambda: self.select_engine("elevenlabs"))
+        self.azure_action.triggered.connect(lambda: self.select_engine("azure"))
+        self.system_action.triggered.connect(lambda: self.select_engine("system"))
+
+    def mousePressEvent(self, event: QMouseEvent):
+        if event.button() == Qt.LeftButton:
+            if event.pos().x() >= self.width() - 100 and event.pos().y() <= 100:
+                self.menu.exec_(self.mapToGlobal(event.pos()))        
 
 
-                AzureEngine(
-                    os.environ.get("AZURE_SPEECH_KEY"),
-                    azure_speech_region,
-                    voice,
-                    rate=34,
-                    pitch=10,
-                ),
-
-                # ElevenlabsEngine(
-                #     os.environ.get("ELEVENLABS_API_KEY")
-                # ),
-                on_character=self.on_character,
-                on_text_stream_stop=self.on_text_stream_stop,
-                on_text_stream_start=self.on_text_stream_start,
-                on_audio_stream_stop=self.on_audio_stream_stop,
-                log_characters=True,
-            )       
+    def init(self):
+        self.select_engine("azure")
         self.recorder = AudioToTextRecorder(
         self.recorder = AudioToTextRecorder(
             model=recorder_model,
             model=recorder_model,
             language=language,
             language=language,
@@ -172,6 +183,49 @@ class TransparentWindow(QWidget):
         self.text_retrieval_thread.start()
         self.text_retrieval_thread.start()
         self.text_retrieval_thread.activate()
         self.text_retrieval_thread.activate()
 
 
+        keyboard.on_press_key('esc', self.on_escape)
+
+    def select_engine(self, engine_name):
+        if self.stream:
+            if self.stream.is_playing():
+                self.stream.stop()
+            self.stream = None
+
+        engine = None
+
+        if engine_name == "azure":
+            engine = AzureEngine(
+                    os.environ.get("AZURE_SPEECH_KEY"),
+                    azure_speech_region,
+                    voice,
+                    rate=34,
+                    pitch=10,
+                )
+
+        elif engine_name == "elevenlabs":
+            engine = ElevenlabsEngine(
+                    os.environ.get("ELEVENLABS_API_KEY")
+                )
+        else:
+            engine = SystemEngine(
+                voice="Stefan",
+                print_installed_voices=True
+            )
+
+        self.stream = TextToAudioStream(
+            engine,
+            on_character=self.on_character,
+            on_text_stream_stop=self.on_text_stream_stop,
+            on_text_stream_start=self.on_text_stream_start,
+            on_audio_stream_stop=self.on_audio_stream_stop,
+            log_characters=True
+        )
+
+
+    def on_escape(self, e):
+        if self.stream.is_playing():
+            self.stream.stop()
+
     def showEvent(self, event: QEvent):
     def showEvent(self, event: QEvent):
         super().showEvent(event)
         super().showEvent(event)
         if event.type() == QEvent.Show:
         if event.type() == QEvent.Show:
@@ -312,6 +366,12 @@ class TransparentWindow(QWidget):
         self.run_fade_assistant = True
         self.run_fade_assistant = True
         self.fade_out_assistant_text()
         self.fade_out_assistant_text()
 
 
+    # def keyPressEvent(self, event):
+    #     if event.key() == Qt.Key_Escape:
+    #         self.stream.stop()
+    #     super().keyPressEvent(event)
+
+
     def update_self(self):
     def update_self(self):
 
 
         self.blockSignals(True)
         self.blockSignals(True)