server.py 3.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109
  1. if __name__ == '__main__':
  2. print("Starting server, please wait...")
  3. from RealtimeSTT import AudioToTextRecorder
  4. import asyncio
  5. import websockets
  6. import threading
  7. import numpy as np
  8. from scipy.signal import resample
  9. import json
  10. recorder = None
  11. recorder_ready = threading.Event()
  12. client_websocket = None
  13. async def send_to_client(message):
  14. if client_websocket:
  15. await client_websocket.send(message)
  16. def text_detected(text):
  17. asyncio.new_event_loop().run_until_complete(
  18. send_to_client(
  19. json.dumps({
  20. 'type': 'realtime',
  21. 'text': text
  22. })
  23. )
  24. )
  25. print(f"\r{text}", flush=True, end='')
  26. recorder_config = {
  27. 'spinner': False,
  28. 'use_microphone': False,
  29. 'model': 'large-v2',
  30. 'language': 'en',
  31. 'silero_sensitivity': 0.4,
  32. 'webrtc_sensitivity': 2,
  33. 'post_speech_silence_duration': 0.7,
  34. 'min_length_of_recording': 0,
  35. 'min_gap_between_recordings': 0,
  36. 'enable_realtime_transcription': True,
  37. 'realtime_processing_pause': 0,
  38. 'realtime_model_type': 'tiny.en',
  39. 'on_realtime_transcription_stabilized': text_detected,
  40. }
  41. def recorder_thread():
  42. global recorder
  43. print("Initializing RealtimeSTT...")
  44. recorder = AudioToTextRecorder(**recorder_config)
  45. print("RealtimeSTT initialized")
  46. recorder_ready.set()
  47. while True:
  48. full_sentence = recorder.text()
  49. asyncio.new_event_loop().run_until_complete(
  50. send_to_client(
  51. json.dumps({
  52. 'type': 'fullSentence',
  53. 'text': full_sentence
  54. })
  55. )
  56. )
  57. print(f"\rSentence: {full_sentence}")
  58. def decode_and_resample(
  59. audio_data,
  60. original_sample_rate,
  61. target_sample_rate):
  62. # Decode 16-bit PCM data to numpy array
  63. audio_np = np.frombuffer(audio_data, dtype=np.int16)
  64. # Calculate the number of samples after resampling
  65. num_original_samples = len(audio_np)
  66. num_target_samples = int(num_original_samples * target_sample_rate /
  67. original_sample_rate)
  68. # Resample the audio
  69. resampled_audio = resample(audio_np, num_target_samples)
  70. return resampled_audio.astype(np.int16).tobytes()
  71. async def echo(websocket, path):
  72. print("Client connected")
  73. global client_websocket
  74. client_websocket = websocket
  75. async for message in websocket:
  76. if not recorder_ready.is_set():
  77. print("Recorder not ready")
  78. continue
  79. metadata_length = int.from_bytes(message[:4], byteorder='little')
  80. metadata_json = message[4:4+metadata_length].decode('utf-8')
  81. metadata = json.loads(metadata_json)
  82. sample_rate = metadata['sampleRate']
  83. chunk = message[4+metadata_length:]
  84. resampled_chunk = decode_and_resample(chunk, sample_rate, 16000)
  85. recorder.feed_audio(resampled_chunk)
  86. start_server = websockets.serve(echo, "0.0.0.0", 9001)
  87. recorder_thread = threading.Thread(target=recorder_thread)
  88. recorder_thread.start()
  89. recorder_ready.wait()
  90. print("Server started. Press Ctrl+C to stop the server.")
  91. asyncio.get_event_loop().run_until_complete(start_server)
  92. asyncio.get_event_loop().run_forever()