|
@@ -0,0 +1,109 @@
|
|
|
|
+if __name__ == '__main__':
|
|
|
|
+ print("Starting server, please wait...")
|
|
|
|
+
|
|
|
|
+ from RealtimeSTT import AudioToTextRecorder
|
|
|
|
+ import asyncio
|
|
|
|
+ import websockets
|
|
|
|
+ import threading
|
|
|
|
+ import numpy as np
|
|
|
|
+ from scipy.signal import resample
|
|
|
|
+ import json
|
|
|
|
+
|
|
|
|
+ recorder = None
|
|
|
|
+ recorder_ready = threading.Event()
|
|
|
|
+ client_websocket = None
|
|
|
|
+
|
|
|
|
+ async def send_to_client(message):
|
|
|
|
+ if client_websocket:
|
|
|
|
+ await client_websocket.send(message)
|
|
|
|
+
|
|
|
|
+ def text_detected(text):
|
|
|
|
+ asyncio.new_event_loop().run_until_complete(
|
|
|
|
+ send_to_client(
|
|
|
|
+ json.dumps({
|
|
|
|
+ 'type': 'realtime',
|
|
|
|
+ 'text': text
|
|
|
|
+ })
|
|
|
|
+ )
|
|
|
|
+ )
|
|
|
|
+ print(f"{text}", flush=True, end='')
|
|
|
|
+
|
|
|
|
+ recorder_config = {
|
|
|
|
+ 'spinner': False,
|
|
|
|
+ 'use_microphone': False,
|
|
|
|
+ 'model': 'large-v3',
|
|
|
|
+ 'language': 'en',
|
|
|
|
+ 'silero_sensitivity': 0.4,
|
|
|
|
+ 'webrtc_sensitivity': 2,
|
|
|
|
+ 'post_speech_silence_duration': 1.0,
|
|
|
|
+ 'min_length_of_recording': 0,
|
|
|
|
+ 'min_gap_between_recordings': 0,
|
|
|
|
+ 'enable_realtime_transcription': True,
|
|
|
|
+ 'realtime_processing_pause': 0,
|
|
|
|
+ 'realtime_model_type': 'tiny.en',
|
|
|
|
+ 'on_realtime_transcription_stabilized': text_detected,
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ def recorder_thread():
|
|
|
|
+ global recorder
|
|
|
|
+ print("Initializing RealtimeSTT...")
|
|
|
|
+ recorder = AudioToTextRecorder(**recorder_config)
|
|
|
|
+ print("RealtimeSTT initialized")
|
|
|
|
+ recorder_ready.set()
|
|
|
|
+ while True:
|
|
|
|
+ full_sentence = recorder.text()
|
|
|
|
+ asyncio.new_event_loop().run_until_complete(
|
|
|
|
+ send_to_client(
|
|
|
|
+ json.dumps({
|
|
|
|
+ 'type': 'fullSentence',
|
|
|
|
+ 'text': full_sentence
|
|
|
|
+ })
|
|
|
|
+ )
|
|
|
|
+ )
|
|
|
|
+ print(f"{full_sentence}")
|
|
|
|
+
|
|
|
|
+ def decode_and_resample(
|
|
|
|
+ audio_data,
|
|
|
|
+ original_sample_rate,
|
|
|
|
+ target_sample_rate):
|
|
|
|
+
|
|
|
|
+ # Decode 16-bit PCM data to numpy array
|
|
|
|
+ audio_np = np.frombuffer(audio_data, dtype=np.int16)
|
|
|
|
+
|
|
|
|
+ # Calculate the number of samples after resampling
|
|
|
|
+ num_original_samples = len(audio_np)
|
|
|
|
+ num_target_samples = int(num_original_samples * target_sample_rate /
|
|
|
|
+ original_sample_rate)
|
|
|
|
+
|
|
|
|
+ # Resample the audio
|
|
|
|
+ resampled_audio = resample(audio_np, num_target_samples)
|
|
|
|
+
|
|
|
|
+ return resampled_audio.astype(np.int16).tobytes()
|
|
|
|
+
|
|
|
|
+ async def echo(websocket, path):
|
|
|
|
+ print("Client connected")
|
|
|
|
+ global client_websocket
|
|
|
|
+ client_websocket = websocket
|
|
|
|
+ async for message in websocket:
|
|
|
|
+
|
|
|
|
+ if not recorder_ready.is_set():
|
|
|
|
+ print("Recorder not ready")
|
|
|
|
+ continue
|
|
|
|
+
|
|
|
|
+ metadata_length = int.from_bytes(message[:4], byteorder='little')
|
|
|
|
+ metadata_json = message[4:4+metadata_length].decode('utf-8')
|
|
|
|
+ metadata = json.loads(metadata_json)
|
|
|
|
+ sample_rate = metadata['sampleRate']
|
|
|
|
+ chunk = message[4+metadata_length:]
|
|
|
|
+ resampled_chunk = decode_and_resample(chunk, sample_rate, 16000)
|
|
|
|
+ recorder.feed_audio(resampled_chunk)
|
|
|
|
+
|
|
|
|
+ start_server = websockets.serve(echo, "localhost", 9001)
|
|
|
|
+
|
|
|
|
+ recorder_thread = threading.Thread(target=recorder_thread)
|
|
|
|
+ recorder_thread.start()
|
|
|
|
+ recorder_ready.wait()
|
|
|
|
+
|
|
|
|
+ print("Server started. Press Ctrl+C to stop the server.")
|
|
|
|
+ asyncio.get_event_loop().run_until_complete(start_server)
|
|
|
|
+ asyncio.get_event_loop().run_forever()
|