123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109 |
- if __name__ == '__main__':
- print("Starting server, please wait...")
- from RealtimeSTT import AudioToTextRecorder
- import asyncio
- import websockets
- import threading
- import numpy as np
- from scipy.signal import resample
- import json
- recorder = None
- recorder_ready = threading.Event()
- client_websocket = None
- async def send_to_client(message):
- if client_websocket:
- await client_websocket.send(message)
- def text_detected(text):
- asyncio.new_event_loop().run_until_complete(
- send_to_client(
- json.dumps({
- 'type': 'realtime',
- 'text': text
- })
- )
- )
- print(f"\r{text}", flush=True, end='')
- recorder_config = {
- 'spinner': False,
- 'use_microphone': False,
- 'model': 'large-v3',
- 'language': 'en',
- 'silero_sensitivity': 0.4,
- 'webrtc_sensitivity': 2,
- 'post_speech_silence_duration': 0.7,
- 'min_length_of_recording': 0,
- 'min_gap_between_recordings': 0,
- 'enable_realtime_transcription': True,
- 'realtime_processing_pause': 0,
- 'realtime_model_type': 'tiny.en',
- 'on_realtime_transcription_stabilized': text_detected,
- }
- def recorder_thread():
- global recorder
- print("Initializing RealtimeSTT...")
- recorder = AudioToTextRecorder(**recorder_config)
- print("RealtimeSTT initialized")
- recorder_ready.set()
- while True:
- full_sentence = recorder.text()
- asyncio.new_event_loop().run_until_complete(
- send_to_client(
- json.dumps({
- 'type': 'fullSentence',
- 'text': full_sentence
- })
- )
- )
- print(f"\rSentence: {full_sentence}")
- def decode_and_resample(
- audio_data,
- original_sample_rate,
- target_sample_rate):
- # Decode 16-bit PCM data to numpy array
- audio_np = np.frombuffer(audio_data, dtype=np.int16)
- # Calculate the number of samples after resampling
- num_original_samples = len(audio_np)
- num_target_samples = int(num_original_samples * target_sample_rate /
- original_sample_rate)
- # Resample the audio
- resampled_audio = resample(audio_np, num_target_samples)
- return resampled_audio.astype(np.int16).tobytes()
- async def echo(websocket, path):
- print("Client connected")
- global client_websocket
- client_websocket = websocket
- async for message in websocket:
- if not recorder_ready.is_set():
- print("Recorder not ready")
- continue
- metadata_length = int.from_bytes(message[:4], byteorder='little')
- metadata_json = message[4:4+metadata_length].decode('utf-8')
- metadata = json.loads(metadata_json)
- sample_rate = metadata['sampleRate']
- chunk = message[4+metadata_length:]
- resampled_chunk = decode_and_resample(chunk, sample_rate, 16000)
- recorder.feed_audio(resampled_chunk)
- start_server = websockets.serve(echo, "localhost", 9001)
- recorder_thread = threading.Thread(target=recorder_thread)
- recorder_thread.start()
- recorder_ready.wait()
- print("Server started. Press Ctrl+C to stop the server.")
- asyncio.get_event_loop().run_until_complete(start_server)
- asyncio.get_event_loop().run_forever()
|