openai_voice_interface.py 1.6 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162
  1. import os
  2. import openai
  3. from RealtimeTTS import TextToAudioStream, AzureEngine
  4. from RealtimeSTT import AudioToTextRecorder
  5. # Initialize OpenAI key
  6. openai.api_key = os.environ.get("OPENAI_API_KEY")
  7. # Text-to-Speech Stream Setup
  8. stream = TextToAudioStream(
  9. # Alternatives: SystemEngine or ElevenlabsEngine
  10. AzureEngine(
  11. os.environ.get("AZURE_SPEECH_KEY"),
  12. "eastus",
  13. ),
  14. log_characters=True
  15. )
  16. # Speech-to-Text Recorder Setup
  17. recorder = AudioToTextRecorder(
  18. model="medium",
  19. language="en",
  20. wake_words="Jarvis",
  21. spinner=True,
  22. wake_word_activation_delay=5
  23. )
  24. system_prompt_message = {
  25. 'role': 'system',
  26. 'content': 'Answer precise and short with the polite sarcasm of a butler.'
  27. }
  28. def generate_response(messages):
  29. """Generate assistant's response using OpenAI."""
  30. for chunk in openai.ChatCompletion.create(model="gpt-3.5-turbo", messages=messages, stream=True):
  31. text_chunk = chunk["choices"][0]["delta"].get("content")
  32. if text_chunk:
  33. yield text_chunk
  34. history = []
  35. def main():
  36. """Main loop for interaction."""
  37. while True:
  38. # Capture user input from microphone
  39. user_text = recorder.text().strip()
  40. if not user_text:
  41. continue
  42. print(f'>>> {user_text}\n<<< ', end="", flush=True)
  43. history.append({'role': 'user', 'content': user_text})
  44. # Get assistant response and play it
  45. assistant_response = generate_response([system_prompt_message] + history[-10:])
  46. stream.feed(assistant_response).play()
  47. history.append({'role': 'assistant', 'content': stream.text()})
  48. if __name__ == "__main__":
  49. main()