преди 8 месеца · 1f959e0bfc
--- a/RealtimeSTT/audio_recorder_client.py
+++ b/RealtimeSTT/audio_recorder_client.py
@@ -1,7 +1,9 @@
 
				 log_outgoing_chunks = False
			
 
				+debug_mode = False
			
 
				 
			
 
				 from typing import Iterable, List, Optional, Union
			
 
				 from urllib.parse import urlparse
			
 
				+from datetime import datetime
			
 
				 import subprocess
			
 
				 import websocket
			
 
				 import threading
			
@@ -44,6 +46,18 @@ INIT_HANDLE_BUFFER_OVERFLOW = False
 
				 if platform.system() != 'Darwin':
			
 
				     INIT_HANDLE_BUFFER_OVERFLOW = True
			
 
				 
			
 
				+# Define ANSI color codes for terminal output
			
 
				+class bcolors:
			
 
				+    HEADER = '\033[95m'   # Magenta
			
 
				+    OKBLUE = '\033[94m'   # Blue
			
 
				+    OKCYAN = '\033[96m'   # Cyan
			
 
				+    OKGREEN = '\033[92m'  # Green
			
 
				+    WARNING = '\033[93m'  # Yellow
			
 
				+    FAIL = '\033[91m'     # Red
			
 
				+    ENDC = '\033[0m'      # Reset to default
			
 
				+    BOLD = '\033[1m'
			
 
				+    UNDERLINE = '\033[4m'
			
 
				+
			
 
				 class AudioToTextRecorderClient:
			
 
				     """
			
 
				     A class responsible for capturing audio from the microphone, detecting
			
@@ -241,10 +255,10 @@ class AudioToTextRecorderClient:
 
				                 if self.final_text_ready.wait(timeout=wait_interval):
			
 
				                     break  # Break if transcription is ready
			
 
				                 
			
 
				-                if not self.realtime_text == self.submitted_realtime_text:
			
 
				-                    if self.on_realtime_transcription_update:
			
 
				-                        self.on_realtime_transcription_update(self.realtime_text)
			
 
				-                    self.submitted_realtime_text = self.realtime_text
			
 
				+                # if not self.realtime_text == self.submitted_realtime_text:
			
 
				+                #     if self.on_realtime_transcription_update:
			
 
				+                #         self.on_realtime_transcription_update(self.realtime_text)
			
 
				+                #     self.submitted_realtime_text = self.realtime_text
			
 
				 
			
 
				                 total_wait_time += wait_interval
			
 
				                 
			
@@ -287,8 +301,6 @@ class AudioToTextRecorderClient:
 
				         Set the microphone on or off.
			
 
				         """
			
 
				         self.muted = not microphone_on
			
 
				-        #self.call_method("set_microphone", [microphone_on])
			
 
				-        # self.use_microphone.value = microphone_on
			
 
				 
			
 
				     def abort(self):
			
 
				         self.call_method("abort")
			
@@ -372,8 +384,6 @@ class AudioToTextRecorderClient:
 
				             args += ['--beam_size', str(self.beam_size)]
			
 
				         if self.beam_size_realtime is not None:
			
 
				             args += ['--beam_size_realtime', str(self.beam_size_realtime)]
			
 
				-        if self.initial_prompt:
			
 
				-            args += ['--initial_prompt', self.initial_prompt]
			
 
				         if self.wake_words is not None:
			
 
				             args += ['--wake_words', str(self.wake_words)]
			
 
				         if self.wake_words_sensitivity is not None:
			
@@ -403,11 +413,15 @@ class AudioToTextRecorderClient:
 
				             parsed_data_url = urlparse(self.data_url)
			
 
				             if parsed_data_url.port:
			
 
				                 args += ['--data_port', str(parsed_data_url.port)]
			
 
				+        if self.initial_prompt:
			
 
				+            sanitized_prompt = self.initial_prompt.replace("\n", "\\n")
			
 
				+            args += ['--initial_prompt', sanitized_prompt]
			
 
				 
			
 
				         # Start the subprocess with the mapped arguments
			
 
				         if os.name == 'nt':  # Windows
			
 
				             cmd = 'start /min cmd /c ' + subprocess.list2cmdline(args)
			
 
				-            # print(f"Opening server with cli command: {cmd}")
			
 
				+            if debug_mode:
			
 
				+                print(f"Opening server with cli command: {cmd}")
			
 
				             subprocess.Popen(cmd, shell=True)
			
 
				         else:  # Unix-like systems
			
 
				             subprocess.Popen(args, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL, start_new_session=True)
			
@@ -570,6 +584,16 @@ class AudioToTextRecorderClient:
 
				                 if data['text'] != self.realtime_text:
			
 
				                     self.realtime_text = data['text']
			
 
				 
			
 
				+                    timestamp = datetime.now().strftime('%H:%M:%S.%f')[:-3]
			
 
				+                    print(f"Realtime text [{timestamp}]: {bcolors.OKCYAN}{self.realtime_text}{bcolors.ENDC}")
			
 
				+
			
 
				+                    if self.on_realtime_transcription_update:
			
 
				+                        # Call the callback in a new thread to avoid blocking
			
 
				+                        threading.Thread(
			
 
				+                            target=self.on_realtime_transcription_update,
			
 
				+                            args=(self.realtime_text,)
			
 
				+                        ).start()
			
 
				+
			
 
				             # Handle full sentences
			
 
				             elif data.get('type') == 'fullSentence':
			
 
				                 self.final_text = data['text']
			
--- a/server/stt_server.py
+++ b/server/stt_server.py
@@ -69,8 +69,11 @@ The server will broadcast real-time transcription updates to all connected clien
 
				 extended_logging = True
			
 
				 send_recorded_chunk = False
			
 
				 log_incoming_chunks = False
			
 
				+stt_optimizations = False
			
 
				 
			
 
				 
			
 
				+from .install_packages import check_and_install_packages
			
 
				+from datetime import datetime
			
 
				 import asyncio
			
 
				 import base64
			
 
				 import sys
			
@@ -78,7 +81,6 @@ import sys
 
				 if sys.platform == 'win32':
			
 
				     asyncio.set_event_loop_policy(asyncio.WindowsSelectorEventLoopPolicy())
			
 
				 
			
 
				-from .install_packages import check_and_install_packages
			
 
				 
			
 
				 check_and_install_packages([
			
 
				     {
			
@@ -177,13 +179,14 @@ def text_detected(text, loop):
 
				 
			
 
				     text = preprocess_text(text)
			
 
				 
			
 
				-    sentence_end_marks = ['.', '!', '?', '。'] 
			
 
				-    if text.endswith("..."):
			
 
				-        recorder.post_speech_silence_duration = global_args.mid_sentence_detection_pause
			
 
				-    elif text and text[-1] in sentence_end_marks and prev_text and prev_text[-1] in sentence_end_marks:
			
 
				-        recorder.post_speech_silence_duration = global_args.end_of_sentence_detection_pause
			
 
				-    else:
			
 
				-        recorder.post_speech_silence_duration = global_args.unknown_sentence_detection_pause
			
 
				+    if stt_optimizations:
			
 
				+        sentence_end_marks = ['.', '!', '?', '。'] 
			
 
				+        if text.endswith("..."):
			
 
				+            recorder.post_speech_silence_duration = global_args.mid_sentence_detection_pause
			
 
				+        elif text and text[-1] in sentence_end_marks and prev_text and prev_text[-1] in sentence_end_marks:
			
 
				+            recorder.post_speech_silence_duration = global_args.end_of_sentence_detection_pause
			
 
				+        else:
			
 
				+            recorder.post_speech_silence_duration = global_args.unknown_sentence_detection_pause
			
 
				 
			
 
				     prev_text = text
			
 
				 
			
@@ -193,10 +196,14 @@ def text_detected(text, loop):
 
				         'text': text
			
 
				     })
			
 
				     asyncio.run_coroutine_threadsafe(audio_queue.put(message), loop)
			
 
				+
			
 
				+    # Get current timestamp in HH:MM:SS.nnn format
			
 
				+    timestamp = datetime.now().strftime('%H:%M:%S.%f')[:-3]
			
 
				+
			
 
				     if extended_logging:
			
 
				-        print(f"Realtime text: {bcolors.OKCYAN}{text}{bcolors.ENDC}\n", flush=True, end="")
			
 
				+        print(f"  [{timestamp}] Realtime text: {bcolors.OKCYAN}{text}{bcolors.ENDC}\n", flush=True, end="")
			
 
				     else:
			
 
				-        print(f"\r{bcolors.OKCYAN}{text}{bcolors.ENDC}", flush=True, end='')
			
 
				+        print(f"\r[{timestamp}] {bcolors.OKCYAN}{text}{bcolors.ENDC}", flush=True, end='')
			
 
				 
			
 
				 def on_recording_start(loop):
			
 
				     # Send a message to the client indicating recording has started
			
@@ -348,7 +355,7 @@ def parse_arguments():
 
				     parser.add_argument('--wake_word_timeout', type=float, default=5.0,
			
 
				                         help='Maximum time in seconds that the system will wait for a wake word before timing out. After this timeout, the system stops listening for wake words until reactivated. Default is 5.0 seconds.')
			
 
				 
			
 
				-    parser.add_argument('--wake_word_activation_delay', type=float, default=0.5,
			
 
				+    parser.add_argument('--wake_word_activation_delay', type=float, default=20,
			
 
				                         help='The delay in seconds before the wake word detection is activated after the system starts listening. This prevents false positives during the start of a session. Default is 0.5 seconds.')
			
 
				 
			
 
				     parser.add_argument('--wakeword_backend', type=str, default='pvporcupine',
			
@@ -369,8 +376,14 @@ def parse_arguments():
 
				     parser.add_argument('--use_extended_logging', action='store_true',
			
 
				                         help='Writes extensive log messages for the recording worker, that processes the audio chunks.')
			
 
				 
			
 
				+    # Parse arguments
			
 
				+    args = parser.parse_args()
			
 
				+
			
 
				+    # Replace escaped newlines with actual newlines in initial_prompt
			
 
				+    if args.initial_prompt:
			
 
				+        args.initial_prompt = args.initial_prompt.replace("\\n", "\n")
			
 
				 
			
 
				-    return parser.parse_args()
			
 
				+    return args
			
 
				 
			
 
				 def _recorder_thread(loop):
			
 
				     global recorder, prev_text, stop_recorder
			
@@ -390,10 +403,12 @@ def _recorder_thread(loop):
 
				         # Use the passed event loop here
			
 
				         asyncio.run_coroutine_threadsafe(audio_queue.put(message), loop)
			
 
				 
			
 
				+        timestamp = datetime.now().strftime('%H:%M:%S.%f')[:-3]
			
 
				+
			
 
				         if extended_logging:
			
 
				-            print(f"Full text: {bcolors.BOLD}Sentence:{bcolors.ENDC} {bcolors.OKGREEN}{full_sentence}{bcolors.ENDC}")
			
 
				+            print(f"  [{timestamp}] Full text: {bcolors.BOLD}Sentence:{bcolors.ENDC} {bcolors.OKGREEN}{full_sentence}{bcolors.ENDC}\n", flush=True, end="")
			
 
				         else:
			
 
				-            print(f"\r{bcolors.BOLD}Sentence:{bcolors.ENDC} {bcolors.OKGREEN}{full_sentence}{bcolors.ENDC}\n")
			
 
				+            print(f"\r[{timestamp}] {bcolors.BOLD}Sentence:{bcolors.ENDC} {bcolors.OKGREEN}{full_sentence}{bcolors.ENDC}\n")
			
 
				     try:
			
 
				         while not stop_recorder:
			
 
				             recorder.text(process_text)
			
@@ -445,8 +460,9 @@ async def control_handler(websocket, path):
 
				                                 value_formatted = f"{value:.2f}"
			
 
				                             else:
			
 
				                                 value_formatted = value
			
 
				+                            timestamp = datetime.now().strftime('%H:%M:%S.%f')[:-3]
			
 
				                             if extended_logging:
			
 
				-                                print(f"{bcolors.OKGREEN}Set recorder.{parameter} to: {bcolors.OKBLUE}{value_formatted}{bcolors.ENDC}")
			
 
				+                                print(f"  [{timestamp}] {bcolors.OKGREEN}Set recorder.{parameter} to: {bcolors.OKBLUE}{value_formatted}{bcolors.ENDC}")
			
 
				                             # Optionally send a response back to the client
			
 
				                             await websocket.send(json.dumps({"status": "success", "message": f"Parameter {parameter} set to {value}"}))
			
 
				                         else:
			
@@ -469,8 +485,9 @@ async def control_handler(websocket, path):
 
				 
			
 
				                             value_truncated = value_formatted[:39] + "…" if len(value_formatted) > 40 else value_formatted
			
 
				 
			
 
				+                            timestamp = datetime.now().strftime('%H:%M:%S.%f')[:-3]
			
 
				                             if extended_logging:
			
 
				-                                print(f"{bcolors.OKGREEN}Get recorder.{parameter}: {bcolors.OKBLUE}{value_truncated}{bcolors.ENDC}")
			
 
				+                                print(f"  [{timestamp}] {bcolors.OKGREEN}Get recorder.{parameter}: {bcolors.OKBLUE}{value_truncated}{bcolors.ENDC}")
			
 
				                             response = {"status": "success", "parameter": parameter, "value": value}
			
 
				                             if request_id is not None:
			
 
				                                 response["request_id"] = request_id
			
@@ -490,7 +507,8 @@ async def control_handler(websocket, path):
 
				                                 args = command_data.get("args", [])
			
 
				                                 kwargs = command_data.get("kwargs", {})
			
 
				                                 method(*args, **kwargs)
			
 
				-                                print(f"{bcolors.OKGREEN}Called method recorder.{bcolors.OKBLUE}{method_name}{bcolors.ENDC}")
			
 
				+                                timestamp = datetime.now().strftime('%H:%M:%S.%f')[:-3]
			
 
				+                                print(f"  [{timestamp}] {bcolors.OKGREEN}Called method recorder.{bcolors.OKBLUE}{method_name}{bcolors.ENDC}")
			
 
				                                 await websocket.send(json.dumps({"status": "success", "message": f"Method {method_name} called"}))
			
 
				                             else:
			
 
				                                 print(f"{bcolors.WARNING}Recorder does not have method {method_name}{bcolors.ENDC}")
			
@@ -541,8 +559,10 @@ async def broadcast_audio_messages():
 
				         message = await audio_queue.get()
			
 
				         for conn in list(data_connections):
			
 
				             try:
			
 
				+                timestamp = datetime.now().strftime('%H:%M:%S.%f')[:-3]
			
 
				+
			
 
				                 if extended_logging:
			
 
				-                    print(f"    {bcolors.OKBLUE}Sending message: {message}{bcolors.ENDC}\n", flush=True, end="")
			
 
				+                    print(f"  [{timestamp}] Sending message: {bcolors.OKBLUE}{message}{bcolors.ENDC}\n", flush=True, end="")
			
 
				                 await conn.send(message)
			
 
				             except websockets.exceptions.ConnectionClosed:
			
 
				                 data_connections.remove(conn)