Ver código fonte

changes to tests

Kolja Beigel 1 ano atrás
pai
commit
1aad06cb26

+ 15 - 14
RealtimeSTT/audio_recorder.py

@@ -239,7 +239,6 @@ class AudioToTextRecorder:
         # If not yet started to record, wait for voice activity to initiate recording.
         if not self.is_recording and len(self.frames) == 0:
             self._set_state("listening")
-            self._set_spinner("speak now")
             self.start_recording_on_voice_activity = True
 
             while not self.is_recording:
@@ -264,10 +263,7 @@ class AudioToTextRecorder:
             self.recording_stop_time = 0
             self.listen_start = 0
 
-            if self.spinner and self.halo:
-                self.halo.stop()
-                self.halo = None
-                self._set_state("inactive")
+            self._set_state("inactive")
 
             return transcription
         
@@ -300,9 +296,7 @@ class AudioToTextRecorder:
         self.frames = []
         self.is_recording = True        
         self.recording_start_time = time.time()
-        self._set_spinner("recording")
         self._set_state("recording")
-        if self.halo: self.halo._interval = 100
 
         if self.on_recording_start:
             self.on_recording_start()
@@ -324,7 +318,6 @@ class AudioToTextRecorder:
         self.is_recording = False
         self.recording_stop_time = time.time()
 
-        self._set_spinner("transcribing")
         self._set_state("transcribing")
 
         if self.on_recording_stop:
@@ -447,6 +440,7 @@ class AudioToTextRecorder:
         return False  # Voice is not active    
 
 
+
     def _set_state(self, new_state):
         """
         Update the current state of the recorder and execute corresponding state-change callbacks.
@@ -477,12 +471,25 @@ class AudioToTextRecorder:
         if new_state == "listening":
             if self.on_vad_detect_start:
                 self.on_vad_detect_start()
+            self._set_spinner("speak now")
+            self.halo._interval = 250
         elif new_state == "wakeword":
             if self.on_wakeword_detection_start:
                 self.on_wakeword_detection_start()
+            self._set_spinner(f"say {self.wake_words}")
+            self.halo._interval = 500
         elif new_state == "transcribing":
             if self.on_transcription_start:
                 self.on_transcription_start()
+            self._set_spinner("transcribing")
+            self.halo._interval = 50
+        elif new_state == "recording":
+            self._set_spinner("recording")
+            self.halo._interval = 100
+        elif new_state == "inactive":
+            if self.spinner and self.halo:
+                self.halo.stop()
+                self.halo = None
 
 
     def _set_spinner(self, text):
@@ -542,17 +549,11 @@ class AudioToTextRecorder:
                 if not self.recording_stop_time:
                     if self.wake_words and wake_word_activation_delay_passed and not self.wakeword_detected:
                         self._set_state("wakeword")
-                        if self.spinner and self.halo:
-                            self.halo.text = f"say {self.wake_words}"
-                            self.halo._interval = 500
                     else:
                         if self.listen_start:
                             self._set_state("listening")
                         else:
                             self._set_state("inactive")
-                        if self.spinner and self.halo:
-                            self.halo.text = "speak now"
-                            self.halo._interval = 200
 
                 # Detect wake words if applicable
                 if self.wake_words and wake_word_activation_delay_passed:

+ 2 - 2
tests/advanced_talk.py

@@ -104,7 +104,7 @@ system_prompt = {
 # start talk  ##########################################################################################################
 
 engine.set_voice(voice)
-stream = TextToAudioStream(engine)
+stream = TextToAudioStream(engine, log_characters=True)
 history = []
 
 def generate(messages):
@@ -131,7 +131,7 @@ while True:
     generator = generate([system_prompt] + history[-10:])
     stream.feed(generator)
 
-    stream.play_async(log_characters=True)
+    stream.play_async()
     while stream.is_playing():
         if keyboard.is_pressed('space'):
             stream.stop()

+ 2 - 2
tests/minimalistic_talkbot.py

@@ -3,7 +3,7 @@ import openai, os
 
 openai.api_key = os.environ.get("OPENAI_API_KEY")
 character_prompt = 'Answer precise and short with the polite sarcasm of a butler.'
-stream = RealtimeTTS.TextToAudioStream(RealtimeTTS.AzureEngine(os.environ.get("AZURE_SPEECH_KEY"), "eastus"))
+stream = RealtimeTTS.TextToAudioStream(RealtimeTTS.AzureEngine(os.environ.get("AZURE_SPEECH_KEY"), "eastus"), log_characters=True)
 recorder = RealtimeSTT.AudioToTextRecorder(model="medium")
 
 def generate(messages):
@@ -16,5 +16,5 @@ while True:
     print(f'>>> {(user_text := recorder.text())}\n<<< ', end="", flush=True)
     history.append({'role': 'user', 'content': user_text})
     assistant_response = generate([{ 'role': 'system',  'content': character_prompt}] + history[-10:])
-    stream.feed(assistant_response).play(log_characters=True)
+    stream.feed(assistant_response).play()
     history.append({'role': 'assistant', 'content': stream.text()})

+ 3 - 2
tests/openai_voice_interface.py

@@ -13,7 +13,8 @@ stream = TextToAudioStream(
     AzureEngine(
         os.environ.get("AZURE_SPEECH_KEY"),
         "eastus",
-    )
+    ),
+    log_characters=True
 )
 
 # Speech-to-Text Recorder Setup
@@ -53,7 +54,7 @@ def main():
 
         # Get assistant response and play it
         assistant_response = generate_response([system_prompt_message] + history[-10:])
-        stream.feed(assistant_response).play(log_characters=True)
+        stream.feed(assistant_response).play()
 
         history.append({'role': 'assistant', 'content': stream.text()})
 

+ 2 - 2
tests/translator.py

@@ -11,7 +11,7 @@ engine = AzureEngine(
     os.environ.get("AZURE_SPEECH_KEY"),
     "eastus"
 )
-stream = TextToAudioStream(engine)
+stream = TextToAudioStream(engine, log_characters=True)
 
 # Speech-to-Text Recorder Setup
 recorder = AudioToTextRecorder(
@@ -65,7 +65,7 @@ def main():
         translation_stream = generate_response([system_prompt_message, user_message])
         print("Translation: ", end="", flush=True)
         stream.feed(translation_stream)
-        stream.play(log_characters=True)
+        stream.play()
 
 if __name__ == "__main__":
     main()