index.html 7.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242
  1. <!DOCTYPE html>
  2. <html lang="en">
  3. <head>
  4. <meta charset="UTF-8">
  5. <meta name="viewport" content="width=device-width, initial-scale=1.0">
  6. <title>Browser STT Client</title>
  7. <style>
  8. body {
  9. background-color: #f4f4f9;
  10. color: #333;
  11. font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;
  12. display: flex;
  13. align-items: center;
  14. justify-content: center;
  15. height: 100vh;
  16. margin: 0;
  17. }
  18. #container {
  19. display: flex;
  20. flex-direction: column;
  21. align-items: center;
  22. width: 100%;
  23. max-width: 700px;
  24. padding: 20px;
  25. box-sizing: border-box;
  26. gap: 20px; /* Add more vertical space between items */
  27. height: 90%; /* Fixed height to prevent layout shift */
  28. }
  29. #status {
  30. color: #0056b3;
  31. font-size: 20px;
  32. text-align: center;
  33. }
  34. #transcriptionContainer {
  35. height: 90px; /* Fixed height for approximately 3 lines of text */
  36. overflow-y: auto;
  37. width: 100%;
  38. padding: 10px;
  39. box-sizing: border-box;
  40. background-color: #f9f9f9;
  41. border: 1px solid #ddd;
  42. border-radius: 5px;
  43. }
  44. #transcription {
  45. font-size: 18px;
  46. line-height: 1.6;
  47. color: #333;
  48. word-wrap: break-word;
  49. }
  50. #fullTextContainer {
  51. height: 150px; /* Fixed height to prevent layout shift */
  52. overflow-y: auto;
  53. width: 100%;
  54. padding: 10px;
  55. box-sizing: border-box;
  56. background-color: #f9f9f9;
  57. border: 1px solid #ddd;
  58. border-radius: 5px;
  59. }
  60. #fullText {
  61. color: #4CAF50;
  62. font-size: 18px;
  63. font-weight: 600;
  64. word-wrap: break-word;
  65. }
  66. .last-word {
  67. color: #007bff;
  68. font-weight: 600;
  69. }
  70. button {
  71. padding: 12px 24px;
  72. font-size: 16px;
  73. cursor: pointer;
  74. border: none;
  75. border-radius: 5px;
  76. margin: 5px;
  77. transition: background-color 0.3s ease;
  78. color: #fff;
  79. background-color: #0056b3;
  80. box-shadow: 0 4px 8px rgba(0, 0, 0, 0.1);
  81. }
  82. button:hover {
  83. background-color: #007bff;
  84. }
  85. button:disabled {
  86. background-color: #cccccc;
  87. cursor: not-allowed;
  88. }
  89. </style>
  90. </head>
  91. <body>
  92. <div id="container">
  93. <div id="status">Press "Start Recording"...</div>
  94. <button id="startButton" onclick="startRecording()">Start Recording</button>
  95. <button id="stopButton" onclick="stopRecording()" disabled>Stop Recording</button>
  96. <div id="transcriptionContainer">
  97. <div id="transcription" class="realtime"></div>
  98. </div>
  99. <div id="fullTextContainer">
  100. <div id="fullText"></div>
  101. </div>
  102. </div>
  103. <script>
  104. const statusDiv = document.getElementById("status");
  105. const transcriptionDiv = document.getElementById("transcription");
  106. const fullTextDiv = document.getElementById("fullText");
  107. const startButton = document.getElementById("startButton");
  108. const stopButton = document.getElementById("stopButton");
  109. const controlURL = "ws://127.0.0.1:8011";
  110. const dataURL = "ws://127.0.0.1:8012";
  111. let dataSocket;
  112. let audioContext;
  113. let mediaStream;
  114. let mediaProcessor;
  115. // Connect to the data WebSocket
  116. function connectToDataSocket() {
  117. dataSocket = new WebSocket(dataURL);
  118. dataSocket.onopen = () => {
  119. statusDiv.textContent = "Connected to STT server.";
  120. console.log("Connected to data WebSocket.");
  121. };
  122. dataSocket.onmessage = (event) => {
  123. try {
  124. const message = JSON.parse(event.data);
  125. if (message.type === "realtime") {
  126. // Show real-time transcription with the last word in bold, orange
  127. let words = message.text.split(" ");
  128. let lastWord = words.pop();
  129. transcriptionDiv.innerHTML = `${words.join(" ")} <span class="last-word">${lastWord}</span>`;
  130. // Auto-scroll to the bottom of the transcription container
  131. const transcriptionContainer = document.getElementById("transcriptionContainer");
  132. transcriptionContainer.scrollTop = transcriptionContainer.scrollHeight;
  133. } else if (message.type === "fullSentence") {
  134. // Accumulate the final transcription in green
  135. fullTextDiv.innerHTML += message.text + " ";
  136. transcriptionDiv.innerHTML = message.text;
  137. // Scroll to the bottom of fullTextContainer when new text is added
  138. const fullTextContainer = document.getElementById("fullTextContainer");
  139. fullTextContainer.scrollTop = fullTextContainer.scrollHeight;
  140. }
  141. } catch (e) {
  142. console.error("Error parsing message:", e);
  143. }
  144. };
  145. dataSocket.onclose = () => {
  146. statusDiv.textContent = "Disconnected from STT server.";
  147. };
  148. dataSocket.onerror = (error) => {
  149. console.error("WebSocket error:", error);
  150. statusDiv.textContent = "Error connecting to the STT server.";
  151. };
  152. }
  153. // Start recording audio from the microphone
  154. async function startRecording() {
  155. try {
  156. startButton.disabled = true;
  157. stopButton.disabled = false;
  158. statusDiv.textContent = "Recording...";
  159. transcriptionDiv.textContent = "";
  160. fullTextDiv.textContent = "";
  161. audioContext = new AudioContext();
  162. mediaStream = await navigator.mediaDevices.getUserMedia({ audio: true });
  163. const input = audioContext.createMediaStreamSource(mediaStream);
  164. // Set up processor for audio chunks
  165. mediaProcessor = audioContext.createScriptProcessor(1024, 1, 1);
  166. mediaProcessor.onaudioprocess = (event) => {
  167. const audioData = event.inputBuffer.getChannelData(0);
  168. sendAudioChunk(audioData, audioContext.sampleRate);
  169. };
  170. input.connect(mediaProcessor);
  171. mediaProcessor.connect(audioContext.destination);
  172. connectToDataSocket();
  173. } catch (error) {
  174. console.error("Error accessing microphone:", error);
  175. statusDiv.textContent = "Error accessing microphone.";
  176. stopRecording();
  177. }
  178. }
  179. // Stop recording audio and close resources
  180. function stopRecording() {
  181. if (mediaProcessor && audioContext) {
  182. mediaProcessor.disconnect();
  183. audioContext.close();
  184. }
  185. if (mediaStream) {
  186. mediaStream.getTracks().forEach(track => track.stop());
  187. }
  188. if (dataSocket) {
  189. dataSocket.close();
  190. }
  191. startButton.disabled = false;
  192. stopButton.disabled = true;
  193. statusDiv.textContent = "Stopped recording.";
  194. }
  195. // Send an audio chunk to the server
  196. function sendAudioChunk(audioData, sampleRate) {
  197. if (dataSocket && dataSocket.readyState === WebSocket.OPEN) {
  198. const float32Array = new Float32Array(audioData);
  199. const pcm16Data = new Int16Array(float32Array.length);
  200. for (let i = 0; i < float32Array.length; i++) {
  201. pcm16Data[i] = Math.max(-1, Math.min(1, float32Array[i])) * 0x7FFF;
  202. }
  203. const metadata = JSON.stringify({ sampleRate });
  204. const metadataLength = new Uint32Array([metadata.length]);
  205. const metadataBuffer = new TextEncoder().encode(metadata);
  206. const message = new Uint8Array(
  207. metadataLength.byteLength + metadataBuffer.byteLength + pcm16Data.byteLength
  208. );
  209. message.set(new Uint8Array(metadataLength.buffer), 0);
  210. message.set(metadataBuffer, metadataLength.byteLength);
  211. message.set(new Uint8Array(pcm16Data.buffer), metadataLength.byteLength + metadataBuffer.byteLength);
  212. dataSocket.send(message);
  213. }
  214. }
  215. </script>
  216. </body>
  217. </html>