瀏覽代碼

whisper v3, browsersupport

Kolja Beigel 1 年之前
父節點
當前提交
f2c6053057
共有 3 個文件被更改,包括 67 次插入6 次删除
  1. 54 2
      example_browserclient/client.js
  2. 10 1
      example_browserclient/index.html
  3. 3 3
      example_browserclient/server.py

+ 54 - 2
example_browserclient/client.js

@@ -1,10 +1,37 @@
 let socket = new WebSocket("ws://localhost:9001");
-
+let displayDiv = document.getElementById('textDisplay');
+let server_available = false;
+let mic_available = false;
 let fullSentences = [];
 
+const serverCheckInterval = 5000; // Check every 5 seconds
+
+function connectToServer() {
+    socket = new WebSocket("ws://localhost:9001");
+
+    socket.onopen = function(event) {
+        server_available = true;
+        start_msg();
+    };
+
+    socket.onmessage = function(event) {
+        let data = JSON.parse(event.data);
+
+        if (data.type === 'realtime') {
+            displayRealtimeText(data.text, displayDiv);
+        } else if (data.type === 'fullSentence') {
+            fullSentences.push(data.text);
+            displayRealtimeText("", displayDiv); // Refresh display with new full sentence
+        }
+    };
+
+    socket.onclose = function(event) {
+        server_available = false;
+    };
+}
+
 socket.onmessage = function(event) {
     let data = JSON.parse(event.data);
-    let displayDiv = document.getElementById('textDisplay');
 
     if (data.type === 'realtime') {
         displayRealtimeText(data.text, displayDiv);
@@ -25,6 +52,29 @@ function displayRealtimeText(realtimeText, displayDiv) {
     displayDiv.innerHTML = displayedText;
 }
 
+function start_msg() {
+    if (!mic_available)
+        displayRealtimeText("🎤  please allow microphone access  🎤", displayDiv);
+    else if (!server_available)
+        displayRealtimeText("🖥️  please start server  🖥️", displayDiv);
+    else
+        displayRealtimeText("👄  start speaking  👄", displayDiv);
+};
+
+// Check server availability periodically
+setInterval(() => {
+    if (!server_available) {
+        connectToServer();
+    }
+}, serverCheckInterval);
+
+start_msg()
+
+socket.onopen = function(event) {
+    server_available = true;
+    start_msg()
+};
+
 // Request access to the microphone
 navigator.mediaDevices.getUserMedia({ audio: true })
 .then(stream => {
@@ -34,6 +84,8 @@ navigator.mediaDevices.getUserMedia({ audio: true })
 
     source.connect(processor);
     processor.connect(audioContext.destination);
+    mic_available = true;
+    start_msg()
 
     processor.onaudioprocess = function(e) {
         let inputData = e.inputBuffer.getChannelData(0);

+ 10 - 1
example_browserclient/index.html

@@ -2,12 +2,21 @@
 <html>
 <head>
     <title>Audio Streamer</title>
+    <meta charset="UTF-8">
     <script src="https://cdn.socket.io/4.0.0/socket.io.min.js"></script>
     <style>
+        html, body {
+            height: 100%;
+            margin: 0;
+        }
         body {
             background-color: black;
             color: white;
             font-family: Arial, sans-serif;
+            display: flex;
+            justify-content: center;
+            align-items: center;
+            text-align: center;
         }
         .text-display {
             white-space: pre-wrap; /* Preserves spaces and line breaks */
@@ -22,7 +31,7 @@
     </style>
 </head>
 <body>
-    <div id="textDisplay" class="text-display"></div>
+    <div id="textDisplay" style="max-width: 800px; margin: auto;">
     <script src="client.js"></script>
 </body>
 </html>

+ 3 - 3
example_browserclient/server.py

@@ -26,7 +26,7 @@ if __name__ == '__main__':
                 })
             )
         )
-        print(f"{text}", flush=True, end='')
+        print(f"\r{text}", flush=True, end='')
 
     recorder_config = {
         'spinner': False,
@@ -35,7 +35,7 @@ if __name__ == '__main__':
         'language': 'en',
         'silero_sensitivity': 0.4,
         'webrtc_sensitivity': 2,
-        'post_speech_silence_duration': 1.0,
+        'post_speech_silence_duration': 0.7,
         'min_length_of_recording': 0,
         'min_gap_between_recordings': 0,
         'enable_realtime_transcription': True,
@@ -60,7 +60,7 @@ if __name__ == '__main__':
                     })
                 )
             )
-            print(f"{full_sentence}")
+            print(f"\rSentence: {full_sentence}")
 
     def decode_and_resample(
             audio_data,