server.py 5.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181
  1. WAIT_FOR_START_COMMAND = False
  2. if __name__ == '__main__':
  3. server = "localhost"
  4. port = 5025
  5. print (f"STT speech to text server")
  6. print (f"runs on http://{server}:{port}")
  7. print ()
  8. print ("starting")
  9. print ("└─ ... ", end='', flush=True)
  10. from RealtimeSTT import AudioToTextRecorder
  11. from colorama import Fore, Back, Style
  12. import websockets
  13. import threading
  14. import colorama
  15. import asyncio
  16. import shutil
  17. import queue
  18. import json
  19. import time
  20. import os
  21. colorama.init()
  22. first_chunk = True
  23. full_sentences = []
  24. displayed_text = ""
  25. message_queue = queue.Queue()
  26. start_recording_event = threading.Event()
  27. start_transcription_event = threading.Event()
  28. connected_clients = set()
  29. def clear_console():
  30. os.system('clear' if os.name == 'posix' else 'cls')
  31. async def handler(websocket, path):
  32. print ("\r└─ OK")
  33. if WAIT_FOR_START_COMMAND:
  34. print("waiting for start command")
  35. print ("└─ ... ", end='', flush=True)
  36. connected_clients.add(websocket)
  37. try:
  38. while True:
  39. async for message in websocket:
  40. data = json.loads(message)
  41. if data.get("type") == "command" and data.get("content") == "start-recording":
  42. print ("\r└─ OK")
  43. start_recording_event.set()
  44. except json.JSONDecodeError:
  45. print (Fore.RED + "STT Received an invalid JSON message." + Style.RESET_ALL)
  46. except websockets.ConnectionClosedError:
  47. print (Fore.RED + "connection closed unexpectedly by the client" + Style.RESET_ALL)
  48. except websockets.exceptions.ConnectionClosedOK:
  49. print("connection closed.")
  50. finally:
  51. print("client disconnected")
  52. connected_clients.remove(websocket)
  53. print ("waiting for clients")
  54. print ("└─ ... ", end='', flush=True)
  55. def add_message_to_queue(type: str, content):
  56. message = {
  57. "type": type,
  58. "content": content
  59. }
  60. message_queue.put(message)
  61. def fill_cli_line(text):
  62. columns, _ = shutil.get_terminal_size()
  63. return text.ljust(columns)[-columns:]
  64. def text_detected(text):
  65. global displayed_text, first_chunk
  66. if text != displayed_text:
  67. first_chunk = False
  68. displayed_text = text
  69. add_message_to_queue("realtime", text)
  70. message = fill_cli_line(text)
  71. message ="└─ " + Fore.CYAN + message[:-3] + Style.RESET_ALL
  72. print(f"\r{message}", end='', flush=True)
  73. async def broadcast(message_obj):
  74. if connected_clients:
  75. for client in connected_clients:
  76. await client.send(json.dumps(message_obj))
  77. async def send_handler():
  78. while True:
  79. while not message_queue.empty():
  80. message = message_queue.get()
  81. await broadcast(message)
  82. await asyncio.sleep(0.02)
  83. def recording_started():
  84. add_message_to_queue("record_start", "")
  85. def vad_detect_started():
  86. add_message_to_queue("vad_start", "")
  87. def wakeword_detect_started():
  88. add_message_to_queue("wakeword_start", "")
  89. def transcription_started():
  90. add_message_to_queue("transcript_start", "")
  91. recorder_config = {
  92. 'spinner': False,
  93. 'model': 'small.en',
  94. 'language': 'en',
  95. 'silero_sensitivity': 0.01,
  96. 'webrtc_sensitivity': 3,
  97. 'silero_use_onnx': False,
  98. 'post_speech_silence_duration': 1.2,
  99. 'min_length_of_recording': 0.2,
  100. 'min_gap_between_recordings': 0,
  101. 'enable_realtime_transcription': True,
  102. 'realtime_processing_pause': 0,
  103. 'realtime_model_type': 'tiny.en',
  104. 'on_realtime_transcription_stabilized': text_detected,
  105. 'on_recording_start' : recording_started,
  106. 'on_vad_detect_start' : vad_detect_started,
  107. 'on_wakeword_detection_start' : wakeword_detect_started,
  108. 'on_transcription_start' : transcription_started,
  109. }
  110. recorder = AudioToTextRecorder(**recorder_config)
  111. def transcriber_thread():
  112. while True:
  113. start_transcription_event.wait()
  114. text = "└─ transcribing ... "
  115. text = fill_cli_line(text)
  116. print (f"\r{text}", end='', flush=True)
  117. sentence = recorder.transcribe()
  118. print (Style.RESET_ALL + "\r└─ " + Fore.YELLOW + sentence + Style.RESET_ALL)
  119. add_message_to_queue("full", sentence)
  120. start_transcription_event.clear()
  121. if WAIT_FOR_START_COMMAND:
  122. print("waiting for start command")
  123. print ("└─ ... ", end='', flush=True)
  124. def recorder_thread():
  125. global first_chunk
  126. while True:
  127. if not len(connected_clients) > 0:
  128. time.sleep(0.1)
  129. continue
  130. first_chunk = True
  131. if WAIT_FOR_START_COMMAND:
  132. start_recording_event.wait()
  133. print("waiting for sentence")
  134. print ("└─ ... ", end='', flush=True)
  135. recorder.wait_audio()
  136. start_transcription_event.set()
  137. start_recording_event.clear()
  138. threading.Thread(target=recorder_thread, daemon=True).start()
  139. threading.Thread(target=transcriber_thread, daemon=True).start()
  140. start_server = websockets.serve(handler, server, port)
  141. loop = asyncio.get_event_loop()
  142. print ("\r└─ OK")
  143. print ("waiting for clients")
  144. print ("└─ ... ", end='', flush=True)
  145. loop.run_until_complete(start_server)
  146. loop.create_task(send_handler())
  147. loop.run_forever()