Browse Source

fix for mac qsize() problem

Kolja Beigel 1 year ago
parent
commit
9be4762c80
3 changed files with 25 additions and 17 deletions
  1. 1 1
      README.md
  2. 22 14
      RealtimeSTT/audio_recorder.py
  3. 2 2
      requirements.txt

+ 1 - 1
README.md

@@ -127,7 +127,7 @@ To use RealtimeSTT with GPU support via CUDA please follow these steps:
 4. **Install PyTorch with CUDA support**:
     ```bash
     pip uninstall torch
-    pip install torch==2.0.1+cu118 torchaudio==2.0.2 --index-url https://download.pytorch.org/whl/cu118
+    pip install torch==2.2.2+cu118 torchaudio==2.2.2 --index-url https://download.pytorch.org/whl/cu118
     ```
 
 ## Quick Start

+ 22 - 14
RealtimeSTT/audio_recorder.py

@@ -27,6 +27,7 @@ Author: Kolja Beigel
 """
 
 import torch.multiprocessing as mp
+import torch
 from typing import List, Union
 import faster_whisper
 import collections
@@ -36,10 +37,10 @@ import traceback
 import threading
 import webrtcvad
 import itertools
+import platform
 import pyaudio
 import logging
 import struct
-import torch
 import halo
 import time
 import os
@@ -65,6 +66,10 @@ SAMPLE_RATE = 16000
 BUFFER_SIZE = 512
 INT16_MAX_ABS_VALUE = 32768.0
 
+INIT_HANDLE_BUFFER_OVERFLOW = False
+if platform.system() != 'Darwin':
+    INIT_HANDLE_BUFFER_OVERFLOW = True
+
 
 class AudioToTextRecorder:
     """
@@ -126,7 +131,8 @@ class AudioToTextRecorder:
                  on_wakeword_detection_start=None,
                  on_wakeword_detection_end=None,
                  on_recorded_chunk=None,
-                 debug_mode=False
+                 debug_mode=False,
+                 handle_buffer_overflow: bool = INIT_HANDLE_BUFFER_OVERFLOW,
                  ):
         """
         Initializes an audio recorder and  transcription
@@ -253,12 +259,13 @@ class AudioToTextRecorder:
             with the recorded audio chunk as its argument.
         - debug_mode (bool, default=False): If set to True, the system will
             print additional debug information to the console.
+        - log_buffer_overflow (bool, default=True): If set to True, the system
+            will log a warning when an input overflow occurs during recording.
 
         Raises:
             Exception: Errors related to initializing transcription
             model, wake word detection, or audio recording.
         """
-
         self.language = language
         self.compute_type = compute_type
         self.input_device_index = input_device_index
@@ -297,6 +304,7 @@ class AudioToTextRecorder:
             on_realtime_transcription_stabilized
         )
         self.debug_mode = debug_mode
+        self.handle_buffer_overflow = handle_buffer_overflow
         self.allowed_latency_limit = ALLOWED_LATENCY_LIMIT
 
         self.level = level
@@ -988,20 +996,20 @@ class AudioToTextRecorder:
                     if self.on_recorded_chunk:
                         self.on_recorded_chunk(data)
 
-                    # Handle queue overflow
-                    queue_overflow_logged = False
-
-                    while (self.audio_queue.qsize() >
-                           self.allowed_latency_limit):
-
-                        if not queue_overflow_logged:
-                            logging.warning("Audio queue size exceeds latency "
-                                            "limit. Current size: "
+                    if self.handle_buffer_overflow:
+                        # Handle queue overflow
+                        if (self.audio_queue.qsize() >
+                                self.allowed_latency_limit):
+                            logging.warning("Audio queue size exceeds "
+                                            "latency limit. Current size: "
                                             f"{self.audio_queue.qsize()}. "
                                             "Discarding old audio chunks."
                                             )
-                            queue_overflow_logged = True
-                        data = self.audio_queue.get()
+
+                        while (self.audio_queue.qsize() >
+                                self.allowed_latency_limit):
+
+                            data = self.audio_queue.get()
 
                 except BrokenPipeError:
                     print("BrokenPipeError _recording_worker")

+ 2 - 2
requirements.txt

@@ -3,5 +3,5 @@ faster-whisper==1.0.1
 pvporcupine==1.9.5
 webrtcvad==2.0.10
 halo==0.0.31
-torch==2.1.2
-torchaudio==2.1.2
+torch==2.2.2
+torchaudio==2.2.2