123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263 |
- import os
- import openai
- from RealtimeTTS import TextToAudioStream, AzureEngine
- from RealtimeSTT import AudioToTextRecorder
- if __name__ == '__main__':
- # Initialize OpenAI key
- openai.api_key = os.environ.get("OPENAI_API_KEY")
- # Text-to-Speech Stream Setup
- stream = TextToAudioStream(
- # Alternatives: SystemEngine or ElevenlabsEngine
- AzureEngine(
- os.environ.get("AZURE_SPEECH_KEY"),
- os.environ.get("AZURE_SPEECH_REGION"),
- ),
- log_characters=True
- )
- # Speech-to-Text Recorder Setup
- recorder = AudioToTextRecorder(
- model="medium",
- language="en",
- wake_words="Jarvis",
- spinner=True,
- wake_word_activation_delay=5
- )
- system_prompt_message = {
- 'role': 'system',
- 'content': 'Answer precise and short with the polite sarcasm of a butler.'
- }
- def generate_response(messages):
- """Generate assistant's response using OpenAI."""
- for chunk in openai.ChatCompletion.create(model="gpt-3.5-turbo", messages=messages, stream=True):
- text_chunk = chunk["choices"][0]["delta"].get("content")
- if text_chunk:
- yield text_chunk
- history = []
- def main():
- """Main loop for interaction."""
- while True:
- # Capture user input from microphone
- user_text = recorder.text().strip()
- if not user_text:
- continue
- print(f'>>> {user_text}\n<<< ', end="", flush=True)
- history.append({'role': 'user', 'content': user_text})
- # Get assistant response and play it
- assistant_response = generate_response([system_prompt_message] + history[-10:])
- stream.feed(assistant_response).play()
- history.append({'role': 'assistant', 'content': stream.text()})
- if __name__ == "__main__":
- main()
|