audio_recorder_client.py 30 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734
  1. log_outgoing_chunks = False
  2. from typing import Iterable, List, Optional, Union
  3. from urllib.parse import urlparse
  4. import subprocess
  5. import websocket
  6. import threading
  7. import platform
  8. import logging
  9. import pyaudio
  10. import socket
  11. import struct
  12. import signal
  13. import json
  14. import time
  15. import sys
  16. import os
  17. DEFAULT_CONTROL_URL = "ws://127.0.0.1:8011"
  18. DEFAULT_DATA_URL = "ws://127.0.0.1:8012"
  19. INIT_MODEL_TRANSCRIPTION = "tiny"
  20. INIT_MODEL_TRANSCRIPTION_REALTIME = "tiny"
  21. INIT_REALTIME_PROCESSING_PAUSE = 0.2
  22. INIT_SILERO_SENSITIVITY = 0.4
  23. INIT_WEBRTC_SENSITIVITY = 3
  24. INIT_POST_SPEECH_SILENCE_DURATION = 0.6
  25. INIT_MIN_LENGTH_OF_RECORDING = 0.5
  26. INIT_MIN_GAP_BETWEEN_RECORDINGS = 0
  27. INIT_WAKE_WORDS_SENSITIVITY = 0.6
  28. INIT_PRE_RECORDING_BUFFER_DURATION = 1.0
  29. INIT_WAKE_WORD_ACTIVATION_DELAY = 0.0
  30. INIT_WAKE_WORD_TIMEOUT = 5.0
  31. INIT_WAKE_WORD_BUFFER_DURATION = 0.1
  32. ALLOWED_LATENCY_LIMIT = 100
  33. CHUNK = 1024
  34. FORMAT = pyaudio.paInt16
  35. CHANNELS = 1
  36. SAMPLE_RATE = 16000
  37. BUFFER_SIZE = 512
  38. INIT_HANDLE_BUFFER_OVERFLOW = False
  39. if platform.system() != 'Darwin':
  40. INIT_HANDLE_BUFFER_OVERFLOW = True
  41. class AudioToTextRecorderClient:
  42. """
  43. A class responsible for capturing audio from the microphone, detecting
  44. voice activity, and then transcribing the captured audio using the
  45. `faster_whisper` model.
  46. """
  47. def __init__(self,
  48. model: str = INIT_MODEL_TRANSCRIPTION,
  49. language: str = "",
  50. compute_type: str = "default",
  51. input_device_index: int = None,
  52. gpu_device_index: Union[int, List[int]] = 0,
  53. device: str = "cuda",
  54. on_recording_start=None,
  55. on_recording_stop=None,
  56. on_transcription_start=None,
  57. ensure_sentence_starting_uppercase=True,
  58. ensure_sentence_ends_with_period=True,
  59. use_microphone=True,
  60. spinner=True,
  61. level=logging.WARNING,
  62. # Realtime transcription parameters
  63. enable_realtime_transcription=False,
  64. use_main_model_for_realtime=False,
  65. realtime_model_type=INIT_MODEL_TRANSCRIPTION_REALTIME,
  66. realtime_processing_pause=INIT_REALTIME_PROCESSING_PAUSE,
  67. on_realtime_transcription_update=None,
  68. on_realtime_transcription_stabilized=None,
  69. # Voice activation parameters
  70. silero_sensitivity: float = INIT_SILERO_SENSITIVITY,
  71. silero_use_onnx: bool = False,
  72. silero_deactivity_detection: bool = False,
  73. webrtc_sensitivity: int = INIT_WEBRTC_SENSITIVITY,
  74. post_speech_silence_duration: float = (
  75. INIT_POST_SPEECH_SILENCE_DURATION
  76. ),
  77. min_length_of_recording: float = (
  78. INIT_MIN_LENGTH_OF_RECORDING
  79. ),
  80. min_gap_between_recordings: float = (
  81. INIT_MIN_GAP_BETWEEN_RECORDINGS
  82. ),
  83. pre_recording_buffer_duration: float = (
  84. INIT_PRE_RECORDING_BUFFER_DURATION
  85. ),
  86. on_vad_detect_start=None,
  87. on_vad_detect_stop=None,
  88. # Wake word parameters
  89. wakeword_backend: str = "pvporcupine",
  90. openwakeword_model_paths: str = None,
  91. openwakeword_inference_framework: str = "onnx",
  92. wake_words: str = "",
  93. wake_words_sensitivity: float = INIT_WAKE_WORDS_SENSITIVITY,
  94. wake_word_activation_delay: float = (
  95. INIT_WAKE_WORD_ACTIVATION_DELAY
  96. ),
  97. wake_word_timeout: float = INIT_WAKE_WORD_TIMEOUT,
  98. wake_word_buffer_duration: float = INIT_WAKE_WORD_BUFFER_DURATION,
  99. on_wakeword_detected=None,
  100. on_wakeword_timeout=None,
  101. on_wakeword_detection_start=None,
  102. on_wakeword_detection_end=None,
  103. on_recorded_chunk=None,
  104. debug_mode=False,
  105. handle_buffer_overflow: bool = INIT_HANDLE_BUFFER_OVERFLOW,
  106. beam_size: int = 5,
  107. beam_size_realtime: int = 3,
  108. buffer_size: int = BUFFER_SIZE,
  109. sample_rate: int = SAMPLE_RATE,
  110. initial_prompt: Optional[Union[str, Iterable[int]]] = None,
  111. suppress_tokens: Optional[List[int]] = [-1],
  112. print_transcription_time: bool = False,
  113. early_transcription_on_silence: int = 0,
  114. allowed_latency_limit: int = ALLOWED_LATENCY_LIMIT,
  115. no_log_file: bool = False,
  116. use_extended_logging: bool = False,
  117. # Server urls
  118. control_url: str = DEFAULT_CONTROL_URL,
  119. data_url: str = DEFAULT_DATA_URL,
  120. autostart_server: bool = True,
  121. ):
  122. # Set instance variables from constructor parameters
  123. self.model = model
  124. self.language = language
  125. self.compute_type = compute_type
  126. self.input_device_index = input_device_index
  127. self.gpu_device_index = gpu_device_index
  128. self.device = device
  129. self.on_recording_start = on_recording_start
  130. self.on_recording_stop = on_recording_stop
  131. self.on_transcription_start = on_transcription_start
  132. self.ensure_sentence_starting_uppercase = ensure_sentence_starting_uppercase
  133. self.ensure_sentence_ends_with_period = ensure_sentence_ends_with_period
  134. self.use_microphone = use_microphone
  135. self.spinner = spinner
  136. self.level = level
  137. # Real-time transcription parameters
  138. self.enable_realtime_transcription = enable_realtime_transcription
  139. self.use_main_model_for_realtime = use_main_model_for_realtime
  140. self.realtime_model_type = realtime_model_type
  141. self.realtime_processing_pause = realtime_processing_pause
  142. self.on_realtime_transcription_update = on_realtime_transcription_update
  143. self.on_realtime_transcription_stabilized = on_realtime_transcription_stabilized
  144. # Voice activation parameters
  145. self.silero_sensitivity = silero_sensitivity
  146. self.silero_use_onnx = silero_use_onnx
  147. self.silero_deactivity_detection = silero_deactivity_detection
  148. self.webrtc_sensitivity = webrtc_sensitivity
  149. self.post_speech_silence_duration = post_speech_silence_duration
  150. self.min_length_of_recording = min_length_of_recording
  151. self.min_gap_between_recordings = min_gap_between_recordings
  152. self.pre_recording_buffer_duration = pre_recording_buffer_duration
  153. self.on_vad_detect_start = on_vad_detect_start
  154. self.on_vad_detect_stop = on_vad_detect_stop
  155. # Wake word parameters
  156. self.wakeword_backend = wakeword_backend
  157. self.openwakeword_model_paths = openwakeword_model_paths
  158. self.openwakeword_inference_framework = openwakeword_inference_framework
  159. self.wake_words = wake_words
  160. self.wake_words_sensitivity = wake_words_sensitivity
  161. self.wake_word_activation_delay = wake_word_activation_delay
  162. self.wake_word_timeout = wake_word_timeout
  163. self.wake_word_buffer_duration = wake_word_buffer_duration
  164. self.on_wakeword_detected = on_wakeword_detected
  165. self.on_wakeword_timeout = on_wakeword_timeout
  166. self.on_wakeword_detection_start = on_wakeword_detection_start
  167. self.on_wakeword_detection_end = on_wakeword_detection_end
  168. self.on_recorded_chunk = on_recorded_chunk
  169. self.debug_mode = debug_mode
  170. self.handle_buffer_overflow = handle_buffer_overflow
  171. self.beam_size = beam_size
  172. self.beam_size_realtime = beam_size_realtime
  173. self.buffer_size = buffer_size
  174. self.sample_rate = sample_rate
  175. self.initial_prompt = initial_prompt
  176. self.suppress_tokens = suppress_tokens
  177. self.print_transcription_time = print_transcription_time
  178. self.early_transcription_on_silence = early_transcription_on_silence
  179. self.allowed_latency_limit = allowed_latency_limit
  180. self.no_log_file = no_log_file
  181. self.use_extended_logging = use_extended_logging
  182. # Server URLs
  183. self.control_url = control_url
  184. self.data_url = data_url
  185. self.autostart_server = autostart_server
  186. # Instance variables
  187. self.muted = False
  188. self.recording_thread = None
  189. self.is_running = True
  190. self.connection_established = threading.Event()
  191. self.recording_start = threading.Event()
  192. self.final_text_ready = threading.Event()
  193. self.realtime_text = ""
  194. self.final_text = ""
  195. self.request_counter = 0
  196. self.pending_requests = {} # Map from request_id to threading.Event and value
  197. if self.debug_mode:
  198. print("Checking STT server")
  199. if not self.connect():
  200. print("Failed to connect to the server.", file=sys.stderr)
  201. else:
  202. if self.debug_mode:
  203. print("STT server is running and connected.")
  204. if self.use_microphone:
  205. self.start_recording()
  206. def text(self, on_transcription_finished=None):
  207. self.realtime_text = ""
  208. self.submitted_realtime_text = ""
  209. self.final_text = ""
  210. self.final_text_ready.clear()
  211. self.recording_start.set()
  212. try:
  213. total_wait_time = 0
  214. wait_interval = 0.02 # Wait in small intervals, e.g., 100ms
  215. max_wait_time = 60 # Timeout after 60 seconds
  216. while total_wait_time < max_wait_time:
  217. if self.final_text_ready.wait(timeout=wait_interval):
  218. break # Break if transcription is ready
  219. if not self.realtime_text == self.submitted_realtime_text:
  220. if self.on_realtime_transcription_update:
  221. self.on_realtime_transcription_update(self.realtime_text)
  222. self.submitted_realtime_text = self.realtime_text
  223. total_wait_time += wait_interval
  224. # Check if a manual interrupt has occurred
  225. if total_wait_time >= max_wait_time:
  226. if self.debug_mode:
  227. print("Timeout while waiting for text from the server.")
  228. self.recording_start.clear()
  229. if on_transcription_finished:
  230. threading.Thread(target=on_transcription_finished, args=("",)).start()
  231. return ""
  232. self.recording_start.clear()
  233. if on_transcription_finished:
  234. threading.Thread(target=on_transcription_finished, args=(self.final_text,)).start()
  235. return self.final_text
  236. except KeyboardInterrupt:
  237. if self.debug_mode:
  238. print("KeyboardInterrupt in record_and_send_audio, exiting...")
  239. raise KeyboardInterrupt
  240. except Exception as e:
  241. print(f"Error in AudioToTextRecorderClient.text(): {e}")
  242. return ""
  243. def feed_audio(self, chunk, original_sample_rate=16000):
  244. metadata = {"sampleRate": original_sample_rate}
  245. metadata_json = json.dumps(metadata)
  246. metadata_length = len(metadata_json)
  247. message = struct.pack('<I', metadata_length) + metadata_json.encode('utf-8') + chunk
  248. if self.is_running:
  249. self.data_ws.send(message, opcode=websocket.ABNF.OPCODE_BINARY)
  250. def set_microphone(self, microphone_on=True):
  251. """
  252. Set the microphone on or off.
  253. """
  254. self.muted = not microphone_on
  255. #self.call_method("set_microphone", [microphone_on])
  256. # self.use_microphone.value = microphone_on
  257. def abort(self):
  258. self.call_method("abort")
  259. def wakeup(self):
  260. self.call_method("wakeup")
  261. def clear_audio_queue(self):
  262. self.call_method("clear_audio_queue")
  263. def stop(self):
  264. self.call_method("stop")
  265. def connect(self):
  266. if not self.ensure_server_running():
  267. print("Cannot start STT server. Exiting.")
  268. return False
  269. try:
  270. # Connect to control WebSocket
  271. self.control_ws = websocket.WebSocketApp(self.control_url,
  272. on_message=self.on_control_message,
  273. on_error=self.on_error,
  274. on_close=self.on_close,
  275. on_open=self.on_control_open)
  276. self.control_ws_thread = threading.Thread(target=self.control_ws.run_forever)
  277. self.control_ws_thread.daemon = False
  278. self.control_ws_thread.start()
  279. # Connect to data WebSocket
  280. self.data_ws = websocket.WebSocketApp(self.data_url,
  281. on_message=self.on_data_message,
  282. on_error=self.on_error,
  283. on_close=self.on_close,
  284. on_open=self.on_data_open)
  285. self.data_ws_thread = threading.Thread(target=self.data_ws.run_forever)
  286. self.data_ws_thread.daemon = False
  287. self.data_ws_thread.start()
  288. # Wait for the connections to be established
  289. if not self.connection_established.wait(timeout=10):
  290. print("Timeout while connecting to the server.")
  291. return False
  292. if self.debug_mode:
  293. print("WebSocket connections established successfully.")
  294. return True
  295. except Exception as e:
  296. print(f"Error while connecting to the server: {e}")
  297. return False
  298. def start_server(self):
  299. args = ['stt-server']
  300. # Map constructor parameters to server arguments
  301. if self.model:
  302. args += ['--model', self.model]
  303. if self.realtime_model_type:
  304. args += ['--realtime_model_type', self.realtime_model_type]
  305. if self.language:
  306. args += ['--language', self.language]
  307. if self.silero_sensitivity is not None:
  308. args += ['--silero_sensitivity', str(self.silero_sensitivity)]
  309. if self.silero_use_onnx:
  310. args.append('--silero_use_onnx') # flag, no need for True/False
  311. if self.webrtc_sensitivity is not None:
  312. args += ['--webrtc_sensitivity', str(self.webrtc_sensitivity)]
  313. if self.min_length_of_recording is not None:
  314. args += ['--min_length_of_recording', str(self.min_length_of_recording)]
  315. if self.min_gap_between_recordings is not None:
  316. args += ['--min_gap_between_recordings', str(self.min_gap_between_recordings)]
  317. if self.realtime_processing_pause is not None:
  318. args += ['--realtime_processing_pause', str(self.realtime_processing_pause)]
  319. if self.early_transcription_on_silence is not None:
  320. args += ['--early_transcription_on_silence', str(self.early_transcription_on_silence)]
  321. if self.silero_deactivity_detection:
  322. args.append('--silero_deactivity_detection') # flag, no need for True/False
  323. if self.beam_size is not None:
  324. args += ['--beam_size', str(self.beam_size)]
  325. if self.beam_size_realtime is not None:
  326. args += ['--beam_size_realtime', str(self.beam_size_realtime)]
  327. if self.initial_prompt:
  328. args += ['--initial_prompt', self.initial_prompt]
  329. if self.wake_words is not None:
  330. args += ['--wake_words', str(self.wake_words)]
  331. if self.wake_words_sensitivity is not None:
  332. args += ['--wake_words_sensitivity', str(self.wake_words_sensitivity)]
  333. if self.wake_word_timeout is not None:
  334. args += ['--wake_word_timeout', str(self.wake_word_timeout)]
  335. if self.wake_word_activation_delay is not None:
  336. args += ['--wake_word_activation_delay', str(self.wake_word_activation_delay)]
  337. if self.wakeword_backend is not None:
  338. args += ['--wakeword_backend', str(self.wakeword_backend)]
  339. if self.openwakeword_model_paths:
  340. args += ['--openwakeword_model_paths', str(self.openwakeword_model_paths)]
  341. if self.openwakeword_inference_framework is not None:
  342. args += ['--openwakeword_inference_framework', str(self.openwakeword_inference_framework)]
  343. if self.wake_word_buffer_duration is not None:
  344. args += ['--wake_word_buffer_duration', str(self.wake_word_buffer_duration)]
  345. if self.use_main_model_for_realtime:
  346. args.append('--use_main_model_for_realtime') # flag, no need for True/False
  347. if self.use_extended_logging:
  348. args.append('--use_extended_logging') # flag, no need for True/False
  349. if self.control_url:
  350. parsed_control_url = urlparse(self.control_url)
  351. if parsed_control_url.port:
  352. args += ['--control_port', str(parsed_control_url.port)]
  353. if self.data_url:
  354. parsed_data_url = urlparse(self.data_url)
  355. if parsed_data_url.port:
  356. args += ['--data_port', str(parsed_data_url.port)]
  357. # Start the subprocess with the mapped arguments
  358. if os.name == 'nt': # Windows
  359. cmd = 'start /min cmd /c ' + subprocess.list2cmdline(args)
  360. # print(f"Opening server with cli command: {cmd}")
  361. subprocess.Popen(cmd, shell=True)
  362. else: # Unix-like systems
  363. subprocess.Popen(args, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL, start_new_session=True)
  364. print("STT server start command issued. Please wait a moment for it to initialize.", file=sys.stderr)
  365. def is_server_running(self):
  366. parsed_url = urlparse(self.control_url)
  367. host = parsed_url.hostname
  368. port = parsed_url.port or 80
  369. with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
  370. return s.connect_ex((host, port)) == 0
  371. def ensure_server_running(self):
  372. if not self.is_server_running():
  373. if self.debug_mode:
  374. print("STT server is not running.", file=sys.stderr)
  375. if self.autostart_server or self.ask_to_start_server():
  376. self.start_server()
  377. if self.debug_mode:
  378. print("Waiting for STT server to start...", file=sys.stderr)
  379. for _ in range(20): # Wait up to 20 seconds
  380. if self.is_server_running():
  381. if self.debug_mode:
  382. print("STT server started successfully.", file=sys.stderr)
  383. time.sleep(2) # Give the server a moment to fully initialize
  384. return True
  385. time.sleep(1)
  386. print("Failed to start STT server.", file=sys.stderr)
  387. return False
  388. else:
  389. print("STT server is required. Please start it manually.", file=sys.stderr)
  390. return False
  391. return True
  392. def start_recording(self):
  393. self.recording_thread = threading.Thread(target=self.record_and_send_audio)
  394. self.recording_thread.daemon = False
  395. self.recording_thread.start()
  396. def setup_audio(self):
  397. try:
  398. self.audio_interface = pyaudio.PyAudio()
  399. self.input_device_index = None
  400. try:
  401. default_device = self.audio_interface.get_default_input_device_info()
  402. self.input_device_index = default_device['index']
  403. except OSError as e:
  404. print(f"No default input device found: {e}")
  405. return False
  406. self.device_sample_rate = 16000 # Try 16000 Hz first
  407. try:
  408. self.stream = self.audio_interface.open(
  409. format=FORMAT,
  410. channels=CHANNELS,
  411. rate=self.device_sample_rate,
  412. input=True,
  413. frames_per_buffer=CHUNK,
  414. input_device_index=self.input_device_index,
  415. )
  416. if self.debug_mode:
  417. print(f"Audio recording initialized successfully at {self.device_sample_rate} Hz")
  418. return True
  419. except Exception as e:
  420. print(f"Failed to initialize audio stream at {self.device_sample_rate} Hz: {e}")
  421. return False
  422. except Exception as e:
  423. print(f"Error initializing audio recording: {e}")
  424. if self.audio_interface:
  425. self.audio_interface.terminate()
  426. return False
  427. def record_and_send_audio(self):
  428. try:
  429. if not self.setup_audio():
  430. raise Exception("Failed to set up audio recording.")
  431. if self.debug_mode:
  432. print("Recording and sending audio...")
  433. while self.is_running:
  434. if self.muted:
  435. time.sleep(0.01)
  436. continue
  437. try:
  438. audio_data = self.stream.read(CHUNK)
  439. if self.on_recorded_chunk:
  440. self.on_recorded_chunk(audio_data)
  441. if self.muted:
  442. continue
  443. if self.recording_start.is_set():
  444. metadata = {"sampleRate": self.device_sample_rate}
  445. metadata_json = json.dumps(metadata)
  446. metadata_length = len(metadata_json)
  447. message = struct.pack('<I', metadata_length) + metadata_json.encode('utf-8') + audio_data
  448. if self.is_running:
  449. if log_outgoing_chunks:
  450. print(".", flush=True, end='')
  451. self.data_ws.send(message, opcode=websocket.ABNF.OPCODE_BINARY)
  452. except KeyboardInterrupt: # handle manual interruption (Ctrl+C)
  453. if self.debug_mode:
  454. print("KeyboardInterrupt in record_and_send_audio, exiting...")
  455. break
  456. except Exception as e:
  457. print(f"Error sending audio data: {e}")
  458. break # Exit the recording loop
  459. except Exception as e:
  460. print(f"Error in record_and_send_audio: {e}")
  461. finally:
  462. self.cleanup_audio()
  463. def cleanup_audio(self):
  464. try:
  465. if self.stream:
  466. self.stream.stop_stream()
  467. self.stream.close()
  468. self.stream = None
  469. if self.audio_interface:
  470. self.audio_interface.terminate()
  471. self.audio_interface = None
  472. except Exception as e:
  473. print(f"Error cleaning up audio resources: {e}")
  474. def on_control_message(self, ws, message):
  475. try:
  476. data = json.loads(message)
  477. # Handle server response with status
  478. if 'status' in data:
  479. if data['status'] == 'success':
  480. if 'parameter' in data and 'value' in data:
  481. request_id = data.get('request_id')
  482. if request_id is not None and request_id in self.pending_requests:
  483. if self.debug_mode:
  484. print(f"Parameter {data['parameter']} = {data['value']}")
  485. self.pending_requests[request_id]['value'] = data['value']
  486. self.pending_requests[request_id]['event'].set()
  487. elif data['status'] == 'error':
  488. print(f"Server Error: {data.get('message', '')}")
  489. else:
  490. print(f"Unknown control message format: {data}")
  491. except json.JSONDecodeError:
  492. print(f"Received non-JSON control message: {message}")
  493. except Exception as e:
  494. print(f"Error processing control message: {e}")
  495. # Handle real-time transcription and full sentence updates
  496. def on_data_message(self, ws, message):
  497. try:
  498. data = json.loads(message)
  499. # Handle real-time transcription updates
  500. if data.get('type') == 'realtime':
  501. if data['text'] != self.realtime_text:
  502. self.realtime_text = data['text']
  503. # Handle full sentences
  504. elif data.get('type') == 'fullSentence':
  505. self.final_text = data['text']
  506. self.final_text_ready.set()
  507. elif data.get('type') == 'recording_start':
  508. if self.on_recording_start:
  509. self.on_recording_start()
  510. elif data.get('type') == 'recording_stop':
  511. if self.on_recording_stop:
  512. self.on_recording_stop()
  513. elif data.get('type') == 'transcription_start':
  514. if self.on_transcription_start:
  515. self.on_transcription_start()
  516. elif data.get('type') == 'vad_detect_start':
  517. if self.on_vad_detect_start:
  518. self.on_vad_detect_start()
  519. elif data.get('type') == 'vad_detect_stop':
  520. if self.on_vad_detect_stop:
  521. self.on_vad_detect_stop()
  522. elif data.get('type') == 'wakeword_detected':
  523. if self.on_wakeword_detected:
  524. self.on_wakeword_detected()
  525. elif data.get('type') == 'wakeword_detection_start':
  526. if self.on_wakeword_detection_start:
  527. self.on_wakeword_detection_start()
  528. elif data.get('type') == 'wakeword_detection_end':
  529. if self.on_wakeword_detection_end:
  530. self.on_wakeword_detection_end()
  531. elif data.get('type') == 'recorded_chunk':
  532. pass
  533. else:
  534. print(f"Unknown data message format: {data}")
  535. except json.JSONDecodeError:
  536. print(f"Received non-JSON data message: {message}")
  537. except Exception as e:
  538. print(f"Error processing data message: {e}")
  539. def on_error(self, ws, error):
  540. print(f"WebSocket error: {error}")
  541. def on_close(self, ws, close_status_code, close_msg):
  542. if self.debug_mode:
  543. if ws == self.data_ws:
  544. print(f"Data WebSocket connection closed: {close_status_code} - {close_msg}")
  545. elif ws == self.control_ws:
  546. print(f"Control WebSocket connection closed: {close_status_code} - {close_msg}")
  547. self.is_running = False
  548. def on_control_open(self, ws):
  549. if self.debug_mode:
  550. print("Control WebSocket connection opened.")
  551. self.connection_established.set()
  552. def on_data_open(self, ws):
  553. if self.debug_mode:
  554. print("Data WebSocket connection opened.")
  555. def set_parameter(self, parameter, value):
  556. command = {
  557. "command": "set_parameter",
  558. "parameter": parameter,
  559. "value": value
  560. }
  561. self.control_ws.send(json.dumps(command))
  562. def get_parameter(self, parameter):
  563. # Generate a unique request_id
  564. request_id = self.request_counter
  565. self.request_counter += 1
  566. # Prepare the command with the request_id
  567. command = {
  568. "command": "get_parameter",
  569. "parameter": parameter,
  570. "request_id": request_id
  571. }
  572. # Create an event to wait for the response
  573. event = threading.Event()
  574. self.pending_requests[request_id] = {'event': event, 'value': None}
  575. # Send the command to the server
  576. self.control_ws.send(json.dumps(command))
  577. # Wait for the response or timeout after 5 seconds
  578. if event.wait(timeout=5):
  579. value = self.pending_requests[request_id]['value']
  580. # Clean up the pending request
  581. del self.pending_requests[request_id]
  582. return value
  583. else:
  584. print(f"Timeout waiting for get_parameter {parameter}")
  585. # Clean up the pending request
  586. del self.pending_requests[request_id]
  587. return None
  588. def call_method(self, method, args=None, kwargs=None):
  589. command = {
  590. "command": "call_method",
  591. "method": method,
  592. "args": args or [],
  593. "kwargs": kwargs or {}
  594. }
  595. self.control_ws.send(json.dumps(command))
  596. def shutdown(self):
  597. self.is_running = False
  598. #self.stop_event.set()
  599. if self.control_ws:
  600. self.control_ws.close()
  601. if self.data_ws:
  602. self.data_ws.close()
  603. # Join threads to ensure they finish before exiting
  604. if self.control_ws_thread:
  605. self.control_ws_thread.join()
  606. if self.data_ws_thread:
  607. self.data_ws_thread.join()
  608. if self.recording_thread:
  609. self.recording_thread.join()
  610. # Clean up audio resources
  611. if self.stream:
  612. self.stream.stop_stream()
  613. self.stream.close()
  614. if self.audio_interface:
  615. self.audio_interface.terminate()
  616. def __enter__(self):
  617. """
  618. Method to setup the context manager protocol.
  619. This enables the instance to be used in a `with` statement, ensuring
  620. proper resource management. When the `with` block is entered, this
  621. method is automatically called.
  622. Returns:
  623. self: The current instance of the class.
  624. """
  625. return self
  626. def __exit__(self, exc_type, exc_value, traceback):
  627. """
  628. Method to define behavior when the context manager protocol exits.
  629. This is called when exiting the `with` block and ensures that any
  630. necessary cleanup or resource release processes are executed, such as
  631. shutting down the system properly.
  632. Args:
  633. exc_type (Exception or None): The type of the exception that
  634. caused the context to be exited, if any.
  635. exc_value (Exception or None): The exception instance that caused
  636. the context to be exited, if any.
  637. traceback (Traceback or None): The traceback corresponding to the
  638. exception, if any.
  639. """
  640. self.shutdown()