openai_voice_interface.py 1.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263
  1. import os
  2. import openai
  3. from RealtimeTTS import TextToAudioStream, AzureEngine
  4. from RealtimeSTT import AudioToTextRecorder
  5. if __name__ == '__main__':
  6. # Initialize OpenAI key
  7. openai.api_key = os.environ.get("OPENAI_API_KEY")
  8. # Text-to-Speech Stream Setup
  9. stream = TextToAudioStream(
  10. # Alternatives: SystemEngine or ElevenlabsEngine
  11. AzureEngine(
  12. os.environ.get("AZURE_SPEECH_KEY"),
  13. os.environ.get("AZURE_SPEECH_REGION"),
  14. ),
  15. log_characters=True
  16. )
  17. # Speech-to-Text Recorder Setup
  18. recorder = AudioToTextRecorder(
  19. model="medium",
  20. language="en",
  21. wake_words="Jarvis",
  22. spinner=True,
  23. wake_word_activation_delay=5
  24. )
  25. system_prompt_message = {
  26. 'role': 'system',
  27. 'content': 'Answer precise and short with the polite sarcasm of a butler.'
  28. }
  29. def generate_response(messages):
  30. """Generate assistant's response using OpenAI."""
  31. for chunk in openai.ChatCompletion.create(model="gpt-3.5-turbo", messages=messages, stream=True):
  32. text_chunk = chunk["choices"][0]["delta"].get("content")
  33. if text_chunk:
  34. yield text_chunk
  35. history = []
  36. def main():
  37. """Main loop for interaction."""
  38. while True:
  39. # Capture user input from microphone
  40. user_text = recorder.text().strip()
  41. if not user_text:
  42. continue
  43. print(f'>>> {user_text}\n<<< ', end="", flush=True)
  44. history.append({'role': 'user', 'content': user_text})
  45. # Get assistant response and play it
  46. assistant_response = generate_response([system_prompt_message] + history[-10:])
  47. stream.feed(assistant_response).play()
  48. history.append({'role': 'assistant', 'content': stream.text()})
  49. if __name__ == "__main__":
  50. main()