Prechádzať zdrojové kódy

engine switchable in example app + abort with esc

Kolja Beigel 1 rok pred
rodič
commit
c4867c9962
1 zmenil súbory, kde vykonal 84 pridanie a 24 odobranie
  1. 84 24
      example_app/VoiceApp.py

+ 84 - 24
example_app/VoiceApp.py

@@ -2,8 +2,8 @@ from RealtimeTTS import TextToAudioStream, AzureEngine, ElevenlabsEngine, System
 from RealtimeSTT import AudioToTextRecorder
 from RealtimeSTT import AudioToTextRecorder
 
 
 from PyQt5.QtCore import Qt, QTimer, QRect, QEvent, pyqtSignal, QThread, QPoint, QPropertyAnimation, QVariantAnimation
 from PyQt5.QtCore import Qt, QTimer, QRect, QEvent, pyqtSignal, QThread, QPoint, QPropertyAnimation, QVariantAnimation
-from PyQt5.QtGui import QPalette, QColor, QPainter, QFontMetrics, QFont
-from PyQt5.QtWidgets import QApplication, QLabel, QWidget, QDesktopWidget
+from PyQt5.QtGui import QPalette, QColor, QPainter, QFontMetrics, QFont, QMouseEvent, QContextMenuEvent
+from PyQt5.QtWidgets import QApplication, QLabel, QWidget, QDesktopWidget, QMenu, QAction
 
 
 import os
 import os
 import openai
 import openai
@@ -12,16 +12,17 @@ import time
 import sounddevice as sd
 import sounddevice as sd
 import numpy as np
 import numpy as np
 import wavio
 import wavio
-
+import keyboard
 
 
 max_history_messages = 6
 max_history_messages = 6
 return_to_wakewords_after_silence = 12
 return_to_wakewords_after_silence = 12
 start_with_wakeword = False
 start_with_wakeword = False
 recorder_model = "large-v2"
 recorder_model = "large-v2"
 language = "de"
 language = "de"
+engine = "azure" # elevenlabs, system
+azure_speech_region = "germanywestcentral"
 
 
 openai.api_key = os.environ.get("OPENAI_API_KEY")
 openai.api_key = os.environ.get("OPENAI_API_KEY")
-azure_speech_region = "germanywestcentral"
 
 
 user_font_size = 22
 user_font_size = 22
 user_color = QColor(208, 208, 208) # gray
 user_color = QColor(208, 208, 208) # gray
@@ -131,27 +132,37 @@ class TransparentWindow(QWidget):
         self.run_fade_user = False
         self.run_fade_user = False
         self.run_fade_assistant = False
         self.run_fade_assistant = False
 
 
-    def init(self):
-        self.stream = TextToAudioStream(
-                # SystemEngine(),
+        self.menu = QMenu()
+        self.menu.setStyleSheet("""
+            QMenu {
+                background-color: black;
+                color: white;
+                border-radius: 10px;
+            }
+            QMenu::item:selected {
+                background-color: #555555;
+            }
+            """)
+
+        self.elevenlabs_action = QAction("Elevenlabs", self)
+        self.azure_action = QAction("Azure", self)
+        self.system_action = QAction("System", self)
+
+        self.menu.addAction(self.elevenlabs_action)
+        self.menu.addAction(self.azure_action)
+        self.menu.addAction(self.system_action)
+
+        self.elevenlabs_action.triggered.connect(lambda: self.select_engine("elevenlabs"))
+        self.azure_action.triggered.connect(lambda: self.select_engine("azure"))
+        self.system_action.triggered.connect(lambda: self.select_engine("system"))
+
+    def mousePressEvent(self, event: QMouseEvent):
+        if event.button() == Qt.LeftButton:
+            if event.pos().x() >= self.width() - 100 and event.pos().y() <= 100:
+                self.menu.exec_(self.mapToGlobal(event.pos()))        
 
 
-                AzureEngine(
-                    os.environ.get("AZURE_SPEECH_KEY"),
-                    azure_speech_region,
-                    voice,
-                    rate=34,
-                    pitch=10,
-                ),
-
-                # ElevenlabsEngine(
-                #     os.environ.get("ELEVENLABS_API_KEY")
-                # ),
-                on_character=self.on_character,
-                on_text_stream_stop=self.on_text_stream_stop,
-                on_text_stream_start=self.on_text_stream_start,
-                on_audio_stream_stop=self.on_audio_stream_stop,
-                log_characters=True,
-            )       
+    def init(self):
+        self.select_engine("azure")
         self.recorder = AudioToTextRecorder(
         self.recorder = AudioToTextRecorder(
             model=recorder_model,
             model=recorder_model,
             language=language,
             language=language,
@@ -172,6 +183,49 @@ class TransparentWindow(QWidget):
         self.text_retrieval_thread.start()
         self.text_retrieval_thread.start()
         self.text_retrieval_thread.activate()
         self.text_retrieval_thread.activate()
 
 
+        keyboard.on_press_key('esc', self.on_escape)
+
+    def select_engine(self, engine_name):
+        if self.stream:
+            if self.stream.is_playing():
+                self.stream.stop()
+            self.stream = None
+
+        engine = None
+
+        if engine_name == "azure":
+            engine = AzureEngine(
+                    os.environ.get("AZURE_SPEECH_KEY"),
+                    azure_speech_region,
+                    voice,
+                    rate=34,
+                    pitch=10,
+                )
+
+        elif engine_name == "elevenlabs":
+            engine = ElevenlabsEngine(
+                    os.environ.get("ELEVENLABS_API_KEY")
+                )
+        else:
+            engine = SystemEngine(
+                voice="Stefan",
+                print_installed_voices=True
+            )
+
+        self.stream = TextToAudioStream(
+            engine,
+            on_character=self.on_character,
+            on_text_stream_stop=self.on_text_stream_stop,
+            on_text_stream_start=self.on_text_stream_start,
+            on_audio_stream_stop=self.on_audio_stream_stop,
+            log_characters=True
+        )
+
+
+    def on_escape(self, e):
+        if self.stream.is_playing():
+            self.stream.stop()
+
     def showEvent(self, event: QEvent):
     def showEvent(self, event: QEvent):
         super().showEvent(event)
         super().showEvent(event)
         if event.type() == QEvent.Show:
         if event.type() == QEvent.Show:
@@ -312,6 +366,12 @@ class TransparentWindow(QWidget):
         self.run_fade_assistant = True
         self.run_fade_assistant = True
         self.fade_out_assistant_text()
         self.fade_out_assistant_text()
 
 
+    # def keyPressEvent(self, event):
+    #     if event.key() == Qt.Key_Escape:
+    #         self.stream.stop()
+    #     super().keyPressEvent(event)
+
+
     def update_self(self):
     def update_self(self):
 
 
         self.blockSignals(True)
         self.blockSignals(True)